Example #1
0
// getChainLines parses a table's iptables-save data to find chains in the table.
// It returns a map of iptables.Chain to string where the string is the chain line from the save (with counters etc).
func getChainLines(table utiliptables.Table, save []byte) map[utiliptables.Chain]string {
	// get lines
	lines := strings.Split(string(save), "\n")
	chainsMap := make(map[utiliptables.Chain]string)
	tablePrefix := "*" + string(table)
	lineNum := 0
	// find beginning of table
	for ; lineNum < len(lines); lineNum++ {
		if strings.HasPrefix(strings.TrimSpace(lines[lineNum]), tablePrefix) {
			lineNum++
			break
		}
	}
	// parse table lines
	for ; lineNum < len(lines); lineNum++ {
		line := strings.TrimSpace(lines[lineNum])
		if strings.HasPrefix(line, "COMMIT") || strings.HasPrefix(line, "*") {
			break
		} else if len(line) == 0 || strings.HasPrefix(line, "#") {
			continue
		} else if strings.HasPrefix(line, ":") && len(line) > 1 {
			chain := utiliptables.Chain(strings.SplitN(line[1:], " ", 2)[0])
			chainsMap[chain] = lines[lineNum]
		}
	}
	return chainsMap
}
Example #2
0
// This is the same as servicePortChainName but with the endpoint included.
func servicePortEndpointChainName(s proxy.ServicePortName, protocol string, endpoint string) utiliptables.Chain {
	hash := sha256.Sum256([]byte(s.String() + protocol + endpoint))
	encoded := base32.StdEncoding.EncodeToString(hash[:])
	return utiliptables.Chain("KUBE-SEP-" + encoded[:16])
}
Example #3
0
// This is where all of the iptables-save/restore calls happen.
// The only other iptables rules are those that are setup in iptablesInit()
// assumes proxier.mu is held
func (proxier *Proxier) syncProxyRules() {
	// don't sync rules till we've received services and endpoints
	if !proxier.haveReceivedEndpointsUpdate || !proxier.haveReceivedServiceUpdate {
		glog.V(2).Info("Not syncing iptables until Services and Endpoints have been received from master")
		return
	}
	glog.V(3).Infof("Syncing iptables rules")

	// Ensure main chains and rules are installed.
	inputChains := []utiliptables.Chain{utiliptables.ChainOutput, utiliptables.ChainPrerouting}
	// Link the services chain.
	for _, chain := range inputChains {
		if _, err := proxier.iptables.EnsureChain(utiliptables.TableNAT, iptablesServicesChain); err != nil {
			glog.Errorf("Failed to ensure that chain %s exists: %v", iptablesServicesChain, err)
			return
		}
		comment := "kubernetes service portals"
		args := []string{"-m", "comment", "--comment", comment, "-j", string(iptablesServicesChain)}
		if _, err := proxier.iptables.EnsureRule(utiliptables.Prepend, utiliptables.TableNAT, chain, args...); err != nil {
			glog.Errorf("Failed to ensure that chain %s jumps to %s: %v", chain, iptablesServicesChain, err)
			return
		}
	}
	// Link the output rules.
	{
		comment := "kubernetes service traffic requiring SNAT"
		args := []string{"-m", "comment", "--comment", comment, "-m", "mark", "--mark", iptablesMasqueradeMark, "-j", "MASQUERADE"}
		if _, err := proxier.iptables.EnsureRule(utiliptables.Append, utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil {
			glog.Errorf("Failed to ensure that chain %s obeys MASQUERADE mark: %v", utiliptables.ChainPostrouting, err)
			return
		}
	}

	// Get iptables-save output so we can check for existing chains and rules.
	// This will be a map of chain name to chain with rules as stored in iptables-save/iptables-restore
	existingChains := make(map[utiliptables.Chain]string)
	iptablesSaveRaw, err := proxier.iptables.Save(utiliptables.TableNAT)
	if err != nil { // if we failed to get any rules
		glog.Errorf("Failed to execute iptable-save, syncing all rules. %s", err.Error())
	} else { // otherwise parse the output
		existingChains = getChainLines(utiliptables.TableNAT, iptablesSaveRaw)
	}

	chainsLines := bytes.NewBuffer(nil)
	rulesLines := bytes.NewBuffer(nil)

	// Write table header.
	writeLine(chainsLines, "*nat")

	// Make sure we keep stats for the top-level chains, if they existed
	// (which they should have because we created them above).
	if chain, ok := existingChains[iptablesServicesChain]; ok {
		writeLine(chainsLines, chain)
	} else {
		writeLine(chainsLines, makeChainLine(iptablesServicesChain))
	}
	if chain, ok := existingChains[iptablesNodePortsChain]; ok {
		writeLine(chainsLines, chain)
	} else {
		writeLine(chainsLines, makeChainLine(iptablesNodePortsChain))
	}

	// Accumulate chains to keep.
	activeChains := map[utiliptables.Chain]bool{} // use a map as a set

	// Accumulate new local ports that we have opened.
	newLocalPorts := map[localPort]closeable{}

	// Build rules for each service.
	for svcName, svcInfo := range proxier.serviceMap {
		protocol := strings.ToLower(string(svcInfo.protocol))

		// Create the per-service chain, retaining counters if possible.
		svcChain := servicePortChainName(svcName, protocol)
		if chain, ok := existingChains[svcChain]; ok {
			writeLine(chainsLines, chain)
		} else {
			writeLine(chainsLines, makeChainLine(svcChain))
		}
		activeChains[svcChain] = true

		// Capture the clusterIP.
		args := []string{
			"-A", string(iptablesServicesChain),
			"-m", "comment", "--comment", fmt.Sprintf("\"%s cluster IP\"", svcName.String()),
			"-m", protocol, "-p", protocol,
			"-d", fmt.Sprintf("%s/32", svcInfo.clusterIP.String()),
			"--dport", fmt.Sprintf("%d", svcInfo.port),
		}
		if proxier.masqueradeAll {
			writeLine(rulesLines, append(args,
				"-j", "MARK", "--set-xmark", fmt.Sprintf("%s/0xffffffff", iptablesMasqueradeMark))...)
		}
		writeLine(rulesLines, append(args,
			"-j", string(svcChain))...)

		// Capture externalIPs.
		for _, externalIP := range svcInfo.externalIPs {
			// If the "external" IP happens to be an IP that is local to this
			// machine, hold the local port open so no other process can open it
			// (because the socket might open but it would never work).
			if local, err := isLocalIP(externalIP); err != nil {
				glog.Errorf("can't determine if IP is local, assuming not: %v", err)
			} else if local {
				lp := localPort{
					desc:     "externalIP for " + svcName.String(),
					ip:       externalIP,
					port:     svcInfo.port,
					protocol: protocol,
				}
				if proxier.portsMap[lp] != nil {
					newLocalPorts[lp] = proxier.portsMap[lp]
				} else {
					socket, err := openLocalPort(&lp)
					if err != nil {
						glog.Errorf("can't open %s, skipping this externalIP: %v", lp.String(), err)
						continue
					}
					newLocalPorts[lp] = socket
				}
			} // We're holding the port, so it's OK to install iptables rules.
			args := []string{
				"-A", string(iptablesServicesChain),
				"-m", "comment", "--comment", fmt.Sprintf("\"%s external IP\"", svcName.String()),
				"-m", protocol, "-p", protocol,
				"-d", fmt.Sprintf("%s/32", externalIP),
				"--dport", fmt.Sprintf("%d", svcInfo.port),
			}
			// We have to SNAT packets to external IPs.
			writeLine(rulesLines, append(args,
				"-j", "MARK", "--set-xmark", fmt.Sprintf("%s/0xffffffff", iptablesMasqueradeMark))...)

			// Allow traffic for external IPs that does not come from a bridge (i.e. not from a container)
			// nor from a local process to be forwarded to the service.
			// This rule roughly translates to "all traffic from off-machine".
			// This is imperfect in the face of network plugins that might not use a bridge, but we can revisit that later.
			externalTrafficOnlyArgs := append(args,
				"-m", "physdev", "!", "--physdev-is-in",
				"-m", "addrtype", "!", "--src-type", "LOCAL")
			writeLine(rulesLines, append(externalTrafficOnlyArgs,
				"-j", string(svcChain))...)
			dstLocalOnlyArgs := append(args, "-m", "addrtype", "--dst-type", "LOCAL")
			// Allow traffic bound for external IPs that happen to be recognized as local IPs to stay local.
			// This covers cases like GCE load-balancers which get added to the local routing table.
			writeLine(rulesLines, append(dstLocalOnlyArgs,
				"-j", string(svcChain))...)
		}

		// Capture load-balancer ingress.
		for _, ingress := range svcInfo.loadBalancerStatus.Ingress {
			if ingress.IP != "" {
				args := []string{
					"-A", string(iptablesServicesChain),
					"-m", "comment", "--comment", fmt.Sprintf("\"%s loadbalancer IP\"", svcName.String()),
					"-m", protocol, "-p", protocol,
					"-d", fmt.Sprintf("%s/32", ingress.IP),
					"--dport", fmt.Sprintf("%d", svcInfo.port),
				}
				// We have to SNAT packets from external IPs.
				writeLine(rulesLines, append(args,
					"-j", "MARK", "--set-xmark", fmt.Sprintf("%s/0xffffffff", iptablesMasqueradeMark))...)
				writeLine(rulesLines, append(args,
					"-j", string(svcChain))...)
			}
		}

		// Capture nodeports.  If we had more than 2 rules it might be
		// worthwhile to make a new per-service chain for nodeport rules, but
		// with just 2 rules it ends up being a waste and a cognitive burden.
		if svcInfo.nodePort != 0 {
			// Hold the local port open so no other process can open it
			// (because the socket might open but it would never work).
			lp := localPort{
				desc:     "nodePort for " + svcName.String(),
				ip:       "",
				port:     svcInfo.nodePort,
				protocol: protocol,
			}
			if proxier.portsMap[lp] != nil {
				newLocalPorts[lp] = proxier.portsMap[lp]
			} else {
				socket, err := openLocalPort(&lp)
				if err != nil {
					glog.Errorf("can't open %s, skipping this nodePort: %v", lp.String(), err)
					continue
				}
				newLocalPorts[lp] = socket
			} // We're holding the port, so it's OK to install iptables rules.
			// Nodeports need SNAT.
			writeLine(rulesLines,
				"-A", string(iptablesNodePortsChain),
				"-m", "comment", "--comment", svcName.String(),
				"-m", protocol, "-p", protocol,
				"--dport", fmt.Sprintf("%d", svcInfo.nodePort),
				"-j", "MARK", "--set-xmark", fmt.Sprintf("%s/0xffffffff", iptablesMasqueradeMark))
			// Jump to the service chain.
			writeLine(rulesLines,
				"-A", string(iptablesNodePortsChain),
				"-m", "comment", "--comment", svcName.String(),
				"-m", protocol, "-p", protocol,
				"--dport", fmt.Sprintf("%d", svcInfo.nodePort),
				"-j", string(svcChain))
		}

		// Generate the per-endpoint chains.  We do this in multiple passes so we
		// can group rules together.
		endpoints := make([]string, 0)
		endpointChains := make([]utiliptables.Chain, 0)
		for _, ep := range proxier.endpointsMap[svcName] {
			endpoints = append(endpoints, ep)
			endpointChain := servicePortEndpointChainName(svcName, protocol, ep)
			endpointChains = append(endpointChains, endpointChain)

			// Create the endpoint chain, retaining counters if possible.
			if chain, ok := existingChains[utiliptables.Chain(endpointChain)]; ok {
				writeLine(chainsLines, chain)
			} else {
				writeLine(chainsLines, makeChainLine(endpointChain))
			}
			activeChains[endpointChain] = true
		}

		// First write session affinity rules, if applicable.
		if svcInfo.sessionAffinityType == api.ServiceAffinityClientIP {
			for _, endpointChain := range endpointChains {
				writeLine(rulesLines,
					"-A", string(svcChain),
					"-m", "comment", "--comment", svcName.String(),
					"-m", "recent", "--name", string(endpointChain),
					"--rcheck", "--seconds", fmt.Sprintf("%d", svcInfo.stickyMaxAgeSeconds), "--reap",
					"-j", string(endpointChain))
			}
		}

		// Now write loadbalancing & DNAT rules.
		n := len(endpointChains)
		for i, endpointChain := range endpointChains {
			// Balancing rules in the per-service chain.
			args := []string{
				"-A", string(svcChain),
				"-m", "comment", "--comment", svcName.String(),
			}
			if i < (n - 1) {
				// Each rule is a probabilistic match.
				args = append(args,
					"-m", "statistic",
					"--mode", "random",
					"--probability", fmt.Sprintf("%0.5f", 1.0/float64(n-i)))
			}
			// The final (or only if n == 1) rule is a guaranteed match.
			args = append(args, "-j", string(endpointChain))
			writeLine(rulesLines, args...)

			// Rules in the per-endpoint chain.
			args = []string{
				"-A", string(endpointChain),
				"-m", "comment", "--comment", svcName.String(),
			}
			// Handle traffic that loops back to the originator with SNAT.
			// Technically we only need to do this if the endpoint is on this
			// host, but we don't have that information, so we just do this for
			// all endpoints.
			// TODO: if we grow logic to get this node's pod CIDR, we can use it.
			writeLine(rulesLines, append(args,
				"-s", fmt.Sprintf("%s/32", strings.Split(endpoints[i], ":")[0]),
				"-j", "MARK", "--set-xmark", fmt.Sprintf("%s/0xffffffff", iptablesMasqueradeMark))...)

			// Update client-affinity lists.
			if svcInfo.sessionAffinityType == api.ServiceAffinityClientIP {
				args = append(args, "-m", "recent", "--name", string(endpointChain), "--set")
			}
			// DNAT to final destination.
			args = append(args,
				"-m", protocol, "-p", protocol,
				"-j", "DNAT", "--to-destination", endpoints[i])
			writeLine(rulesLines, args...)
		}
	}

	// Delete chains no longer in use.
	for chain := range existingChains {
		if !activeChains[chain] {
			chainString := string(chain)
			if !strings.HasPrefix(chainString, "KUBE-SVC-") && !strings.HasPrefix(chainString, "KUBE-SEP-") {
				// Ignore chains that aren't ours.
				continue
			}
			// We must (as per iptables) write a chain-line for it, which has
			// the nice effect of flushing the chain.  Then we can remove the
			// chain.
			writeLine(chainsLines, existingChains[chain])
			writeLine(rulesLines, "-X", chainString)
		}
	}

	// Finally, tail-call to the nodeports chain.  This needs to be after all
	// other service portal rules.
	writeLine(rulesLines,
		"-A", string(iptablesServicesChain),
		"-m", "comment", "--comment", "\"kubernetes service nodeports; NOTE: this must be the last rule in this chain\"",
		"-m", "addrtype", "--dst-type", "LOCAL",
		"-j", string(iptablesNodePortsChain))

	// Write the end-of-table marker.
	writeLine(rulesLines, "COMMIT")

	// Sync rules.
	// NOTE: NoFlushTables is used so we don't flush non-kubernetes chains in the table.
	lines := append(chainsLines.Bytes(), rulesLines.Bytes()...)
	glog.V(3).Infof("Syncing rules: %s", lines)
	err = proxier.iptables.Restore(utiliptables.TableNAT, lines, utiliptables.NoFlushTables, utiliptables.RestoreCounters)
	if err != nil {
		glog.Errorf("Failed to sync iptables rules: %v", err)
		// Revert new local ports.
		for k, v := range newLocalPorts {
			glog.Errorf("Closing local port %s", k.String())
			v.Close()
		}
	} else {
		// Close old local ports and save new ones.
		for k, v := range proxier.portsMap {
			if newLocalPorts[k] == nil {
				v.Close()
			}
		}
		proxier.portsMap = newLocalPorts
	}
}
Example #4
0
func TestgetChainLinesMultipleTables(t *testing.T) {
	iptables_save := `# Generated by iptables-save v1.4.21 on Fri Aug  7 14:47:37 2015
	*nat
	:PREROUTING ACCEPT [2:138]
	:INPUT ACCEPT [0:0]
	:OUTPUT ACCEPT [0:0]
	:POSTROUTING ACCEPT [0:0]
	:DOCKER - [0:0]
	:KUBE-NODEPORT-CONTAINER - [0:0]
	:KUBE-NODEPORT-HOST - [0:0]
	:KUBE-PORTALS-CONTAINER - [0:0]
	:KUBE-PORTALS-HOST - [0:0]
	:KUBE-SVC-Dgkr8H9s4LQ2mn-Py5U - [0:0]
	:KUBE-SVC-PknUqKuv-LNZiCKRqGm - [0:0]
	:KUBE-SVC-RWEx6uDf8yWGww1OQ8E - [0:0]
	:KUBE-SVC-UvIpe7oTYVlacW1-G4C - [0:0]
	:KUBE-SVC-g_TrwxBdTXDbEtecmNo - [0:0]
	:KUBE-SVC-gvTMDzeC8lcXUan15iP - [0:0]
	-A PREROUTING -m comment --comment "handle ClusterIPs; NOTE: this must be before the NodePort rules" -j KUBE-PORTALS-CONTAINER
	-A PREROUTING -m addrtype --dst-type LOCAL -j DOCKER
	-A PREROUTING -m addrtype --dst-type LOCAL -m comment --comment "handle service NodePorts; NOTE: this must be the last rule in the chain" -j KUBE-NODEPORT-CONTAINER
	-A OUTPUT -m comment --comment "handle ClusterIPs; NOTE: this must be before the NodePort rules" -j KUBE-PORTALS-HOST
	-A OUTPUT ! -d 127.0.0.0/8 -m addrtype --dst-type LOCAL -j DOCKER
	-A OUTPUT -m addrtype --dst-type LOCAL -m comment --comment "handle service NodePorts; NOTE: this must be the last rule in the chain" -j KUBE-NODEPORT-HOST
	-A POSTROUTING -s 10.246.1.0/24 ! -o cbr0 -j MASQUERADE
	-A POSTROUTING -s 10.0.2.15/32 -d 10.0.2.15/32 -m comment --comment "handle pod connecting to self" -j MASQUERADE
	-A KUBE-PORTALS-CONTAINER -d 10.247.0.1/32 -p tcp -m comment --comment "portal for default/kubernetes:" -m state --state NEW -m tcp --dport 443 -j KUBE-SVC-g_TrwxBdTXDbEtecmNo
	-A KUBE-PORTALS-CONTAINER -d 10.247.0.10/32 -p udp -m comment --comment "portal for kube-system/kube-dns:dns" -m state --state NEW -m udp --dport 53 -j KUBE-SVC-gvTMDzeC8lcXUan15iP
	-A KUBE-PORTALS-CONTAINER -d 10.247.0.10/32 -p tcp -m comment --comment "portal for kube-system/kube-dns:dns-tcp" -m state --state NEW -m tcp --dport 53 -j KUBE-SVC-PknUqKuv-LNZiCKRqGm
	-A KUBE-PORTALS-HOST -d 10.247.0.1/32 -p tcp -m comment --comment "portal for default/kubernetes:" -m state --state NEW -m tcp --dport 443 -j KUBE-SVC-g_TrwxBdTXDbEtecmNo
	-A KUBE-PORTALS-HOST -d 10.247.0.10/32 -p udp -m comment --comment "portal for kube-system/kube-dns:dns" -m state --state NEW -m udp --dport 53 -j KUBE-SVC-gvTMDzeC8lcXUan15iP
	-A KUBE-PORTALS-HOST -d 10.247.0.10/32 -p tcp -m comment --comment "portal for kube-system/kube-dns:dns-tcp" -m state --state NEW -m tcp --dport 53 -j KUBE-SVC-PknUqKuv-LNZiCKRqGm
	-A KUBE-SVC-Dgkr8H9s4LQ2mn-Py5U -p udp -m comment --comment "kube-system/kube-dns:dns" -m recent --set --name KUBE-SVC-Dgkr8H9s4LQ2mn-Py5U --mask 255.255.255.255 --rsource -j DNAT --to-destination 10.246.1.2:53
	-A KUBE-SVC-PknUqKuv-LNZiCKRqGm -m comment --comment "kube-system/kube-dns:dns-tcp" -j KUBE-SVC-RWEx6uDf8yWGww1OQ8E
	-A KUBE-SVC-RWEx6uDf8yWGww1OQ8E -p tcp -m comment --comment "kube-system/kube-dns:dns-tcp" -m recent --set --name KUBE-SVC-RWEx6uDf8yWGww1OQ8E --mask 255.255.255.255 --rsource -j DNAT --to-destination 10.246.1.2:53
	-A KUBE-SVC-UvIpe7oTYVlacW1-G4C -p tcp -m comment --comment "default/kubernetes:" -m recent --set --name KUBE-SVC-UvIpe7oTYVlacW1-G4C --mask 255.255.255.255 --rsource -j DNAT --to-destination 10.245.1.2:443
	-A KUBE-SVC-g_TrwxBdTXDbEtecmNo -m comment --comment "default/kubernetes:" -j KUBE-SVC-UvIpe7oTYVlacW1-G4C
	-A KUBE-SVC-gvTMDzeC8lcXUan15iP -m comment --comment "kube-system/kube-dns:dns" -j KUBE-SVC-Dgkr8H9s4LQ2mn-Py5U
	COMMIT
	# Completed on Fri Aug  7 14:47:37 2015
	# Generated by iptables-save v1.4.21 on Fri Aug  7 14:47:37 2015
	*filter
	:INPUT ACCEPT [17514:83115836]
	:FORWARD ACCEPT [0:0]
	:OUTPUT ACCEPT [8909:688225]
	:DOCKER - [0:0]
	-A FORWARD -o cbr0 -j DOCKER
	-A FORWARD -o cbr0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
	-A FORWARD -i cbr0 ! -o cbr0 -j ACCEPT
	-A FORWARD -i cbr0 -o cbr0 -j ACCEPT
	COMMIT
	`
	expected := map[utiliptables.Chain]string{
		utiliptables.ChainPrerouting:                       ":PREROUTING ACCEPT [2:138]",
		utiliptables.Chain("INPUT"):                        ":INPUT ACCEPT [0:0]",
		utiliptables.Chain("OUTPUT"):                       ":OUTPUT ACCEPT [0:0]",
		utiliptables.ChainPostrouting:                      ":POSTROUTING ACCEPT [0:0]",
		utiliptables.Chain("DOCKER"):                       ":DOCKER - [0:0]",
		utiliptables.Chain("KUBE-NODEPORT-CONTAINER"):      ":KUBE-NODEPORT-CONTAINER - [0:0]",
		utiliptables.Chain("KUBE-NODEPORT-HOST"):           ":KUBE-NODEPORT-HOST - [0:0]",
		utiliptables.Chain("KUBE-PORTALS-CONTAINER"):       ":KUBE-PORTALS-CONTAINER - [0:0]",
		utiliptables.Chain("KUBE-PORTALS-HOST"):            ":KUBE-PORTALS-HOST - [0:0]",
		utiliptables.Chain("KUBE-SVC-Dgkr8H9s4LQ2mn-Py5U"): ":KUBE-SVC-Dgkr8H9s4LQ2mn-Py5U - [0:0]",
		utiliptables.Chain("KUBE-SVC-PknUqKuv-LNZiCKRqGm"): ":KUBE-SVC-PknUqKuv-LNZiCKRqGm - [0:0]",
		utiliptables.Chain("KUBE-SVC-RWEx6uDf8yWGww1OQ8E"): ":KUBE-SVC-RWEx6uDf8yWGww1OQ8E - [0:0]",
		utiliptables.Chain("KUBE-SVC-UvIpe7oTYVlacW1-G4C"): ":KUBE-SVC-UvIpe7oTYVlacW1-G4C - [0:0]",
		utiliptables.Chain("KUBE-SVC-g_TrwxBdTXDbEtecmNo"): ":KUBE-SVC-g_TrwxBdTXDbEtecmNo - [0:0]",
		utiliptables.Chain("KUBE-SVC-gvTMDzeC8lcXUan15iP"): ":KUBE-SVC-gvTMDzeC8lcXUan15iP - [0:0]",
	}
	checkAllLines(t, utiliptables.TableNAT, []byte(iptables_save), expected)
}