Beispiel #1
0
// NewProxier returns a new Proxier given an iptables Interface instance.
// Because of the iptables logic, it is assumed that there is only a single Proxier active on a machine.
// An error will be returned if iptables fails to update or acquire the initial lock.
// Once a proxier is created, it will keep iptables up to date in the background and
// will not terminate if a particular iptables call fails.
func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod time.Duration, masqueradeAll bool, masqueradeBit int) (*Proxier, error) {
	// Set the route_localnet sysctl we need for
	if err := utilsysctl.SetSysctl(sysctlRouteLocalnet, 1); err != nil {
		return nil, fmt.Errorf("can't set sysctl %s: %v", sysctlRouteLocalnet, err)
	}

	// Load the module.  It's OK if this fails (e.g. the module is not present)
	// because we'll catch the error on the sysctl, which is what we actually
	// care about.
	exec.Command("modprobe", "br-netfilter").CombinedOutput()
	if err := utilsysctl.SetSysctl(sysctlBridgeCallIptables, 1); err != nil {
		glog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIptables, err)
	}

	// Generate the masquerade mark to use for SNAT rules.
	if masqueradeBit < 0 || masqueradeBit > 31 {
		return nil, fmt.Errorf("invalid iptables-masquerade-bit %v not in [0, 31]", masqueradeBit)
	}
	masqueradeValue := 1 << uint(masqueradeBit)
	masqueradeMark := fmt.Sprintf("%#08x/%#08x", masqueradeValue, masqueradeValue)

	return &Proxier{
		serviceMap:     make(map[proxy.ServicePortName]*serviceInfo),
		endpointsMap:   make(map[proxy.ServicePortName][]string),
		portsMap:       make(map[localPort]closeable),
		syncPeriod:     syncPeriod,
		iptables:       ipt,
		masqueradeAll:  masqueradeAll,
		masqueradeMark: masqueradeMark,
	}, nil
}
Beispiel #2
0
func StartNode(nodeConfig configapi.NodeConfig) error {
	config, err := kubernetes.BuildKubernetesNodeConfig(nodeConfig)
	if err != nil {
		return err
	}
	glog.Infof("Starting node %s (%s)", config.KubeletServer.HostnameOverride, version.Get().String())

	// preconditions
	config.EnsureVolumeDir()
	config.EnsureDocker(docker.NewHelper())

	// async starts
	config.RunKubelet()
	config.RunSDN()
	config.RunProxy()

	// HACK: RunProxy resets bridge-nf-call-iptables from what openshift-sdn requires
	if config.SDNPlugin != nil {
		sdnPluginName := nodeConfig.NetworkConfig.NetworkPluginName
		if sdnPluginName == osdn.SingleTenantPluginName() || sdnPluginName == osdn.MultiTenantPluginName() {
			if err := sysctl.SetSysctl("net/bridge/bridge-nf-call-iptables", 0); err != nil {
				glog.Warningf("Could not set net.bridge.bridge-nf-call-iptables sysctl: %s", err)
			}
		}
	}

	return nil
}
// setupKernelTunables validates kernel tunable flags are set as expected
// depending upon the specified option, it will either warn, error, or modify the kernel tunable flags
func setupKernelTunables(option KernelTunableBehavior) error {
	desiredState := map[string]int{
		utilsysctl.VmOvercommitMemory: utilsysctl.VmOvercommitMemoryAlways,
		utilsysctl.VmPanicOnOOM:       utilsysctl.VmPanicOnOOMInvokeOOMKiller,
		utilsysctl.KernelPanic:        utilsysctl.KernelPanicRebootTimeout,
		utilsysctl.KernelPanicOnOops:  utilsysctl.KernelPanicOnOopsAlways,
	}

	errList := []error{}
	for flag, expectedValue := range desiredState {
		val, err := utilsysctl.GetSysctl(flag)
		if err != nil {
			errList = append(errList, err)
			continue
		}
		if val == expectedValue {
			continue
		}

		switch option {
		case KernelTunableError:
			errList = append(errList, fmt.Errorf("Invalid kernel flag: %v, expected value: %v, actual value: %v", flag, expectedValue, val))
		case KernelTunableWarn:
			glog.V(2).Infof("Invalid kernel flag: %v, expected value: %v, actual value: %v", flag, expectedValue, val)
		case KernelTunableModify:
			glog.V(2).Infof("Updating kernel flag: %v, expected value: %v, actual value: %v", flag, expectedValue, val)
			err = utilsysctl.SetSysctl(flag, expectedValue)
			if err != nil {
				errList = append(errList, err)
			}
		}
	}
	return utilerrors.NewAggregate(errList)
}
Beispiel #4
0
func (plugin *kubenetNetworkPlugin) Init(host network.Host) error {
	plugin.host = host
	plugin.cniConfig = &libcni.CNIConfig{
		Path: []string{DefaultCNIDir},
	}

	if link, err := findMinMTU(); err == nil {
		plugin.MTU = link.MTU
		glog.V(5).Infof("Using interface %s MTU %d as bridge MTU", link.Name, link.MTU)
	} else {
		glog.Warningf("Failed to find default bridge MTU: %v", err)
	}

	// Since this plugin uses a Linux bridge, set bridge-nf-call-iptables=1
	// is necessary to ensure kube-proxy functions correctly.
	//
	// This will return an error on older kernel version (< 3.18) as the module
	// was built-in, we simply ignore the error here. A better thing to do is
	// to check the kernel version in the future.
	utilexec.New().Command("modprobe", "br-netfilter").CombinedOutput()
	if err := utilsysctl.SetSysctl(sysctlBridgeCallIptables, 1); err != nil {
		glog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIptables, err)
	}

	return nil
}
Beispiel #5
0
// NewProxier returns a new Proxier given an iptables Interface instance.
// Because of the iptables logic, it is assumed that there is only a single Proxier active on a machine.
// An error will be returned if iptables fails to update or acquire the initial lock.
// Once a proxier is created, it will keep iptables up to date in the background and
// will not terminate if a particular iptables call fails.
func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod time.Duration, masqueradeAll bool, masqueradeBit int, clusterCIDR string) (*Proxier, error) {
	// Set the route_localnet sysctl we need for
	if err := utilsysctl.SetSysctl(sysctlRouteLocalnet, 1); err != nil {
		return nil, fmt.Errorf("can't set sysctl %s: %v", sysctlRouteLocalnet, err)
	}

	// Proxy needs br_netfilter and bridge-nf-call-iptables=1 when containers
	// are connected to a Linux bridge (but not SDN bridges).  Until most
	// plugins handle this, log when config is missing
	if val, err := utilsysctl.GetSysctl(sysctlBridgeCallIptables); err == nil && val != 1 {
		glog.Infof("missing br-netfilter module or unset sysctl br-nf-call-iptables; proxy may not work as intended")
	}

	// Generate the masquerade mark to use for SNAT rules.
	if masqueradeBit < 0 || masqueradeBit > 31 {
		return nil, fmt.Errorf("invalid iptables-masquerade-bit %v not in [0, 31]", masqueradeBit)
	}
	masqueradeValue := 1 << uint(masqueradeBit)
	masqueradeMark := fmt.Sprintf("%#08x/%#08x", masqueradeValue, masqueradeValue)

	return &Proxier{
		serviceMap:     make(map[proxy.ServicePortName]*serviceInfo),
		endpointsMap:   make(map[proxy.ServicePortName][]string),
		portsMap:       make(map[localPort]closeable),
		syncPeriod:     syncPeriod,
		iptables:       ipt,
		masqueradeAll:  masqueradeAll,
		masqueradeMark: masqueradeMark,
		exec:           exec,
		clusterCIDR:    clusterCIDR,
	}, nil
}
Beispiel #6
0
// ResetSysctlFromProxy resets the bridge-nf-call-iptables systctl that the Kube proxy sets, which
// is required for normal Docker containers to talk to the SDN plugin on the local system.
// Resolution is https://github.com/kubernetes/kubernetes/pull/20647
func (c *NodeConfig) ResetSysctlFromProxy() {
	if c.SDNPlugin == nil {
		return
	}
	if err := sysctl.SetSysctl("net/bridge/bridge-nf-call-iptables", 0); err != nil {
		glog.Warningf("Could not set net.bridge.bridge-nf-call-iptables sysctl: %s", err)
	}
}
Beispiel #7
0
func (realConntracker) SetMax(max int) error {
	glog.Infof("Setting nf_conntrack_max to %d", max)
	if err := sysctl.SetSysctl("net/netfilter/nf_conntrack_max", max); err != nil {
		return err
	}
	// TODO: generify this and sysctl to a new sysfs.WriteInt()
	glog.Infof("Setting conntrack hashsize to %d", max/4)
	return ioutil.WriteFile("/sys/module/nf_conntrack/parameters/hashsize", []byte(strconv.Itoa(max/4)), 0640)
}
Beispiel #8
0
// changeSysctl changes the required network setting in /proc to get
// keepalived working in the local system.
func changeSysctl() error {
	for k, v := range sysctlAdjustments {
		if err := sysctl.SetSysctl(k, v); err != nil {
			return err
		}
	}

	return nil
}
Beispiel #9
0
// NewProxier returns a new Proxier given an iptables Interface instance.
// Because of the iptables logic, it is assumed that there is only a single Proxier active on a machine.
// An error will be returned if iptables fails to update or acquire the initial lock.
// Once a proxier is created, it will keep iptables up to date in the background and
// will not terminate if a particular iptables call fails.
func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod time.Duration, masqueradeAll bool) (*Proxier, error) {
	// Set the route_localnet sysctl we need for
	if err := utilsysctl.SetSysctl(sysctlRouteLocalnet, 1); err != nil {
		return nil, fmt.Errorf("can't set sysctl %s: %v", sysctlRouteLocalnet, err)
	}

	// Load the module.  It's OK if this fails (e.g. the module is not present)
	// because we'll catch the error on the sysctl, which is what we actually
	// care about.
	exec.Command("modprobe", "br-netfilter").CombinedOutput()
	if err := utilsysctl.SetSysctl(sysctlBridgeCallIptables, 1); err != nil {
		glog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIptables, err)
	}

	return &Proxier{
		serviceMap:    make(map[proxy.ServicePortName]*serviceInfo),
		endpointsMap:  make(map[proxy.ServicePortName][]string),
		portsMap:      make(map[localPort]closeable),
		syncPeriod:    syncPeriod,
		iptables:      ipt,
		masqueradeAll: masqueradeAll,
	}, nil
}
// disableKernelMemoryOvercommitHandling tells the kernel to perform no memory over-commit handling.
// Under this setting, the potential for memory overload is increased, but so is performance for
// memory-intensive tasks
// sets /proc/sys/vm/overcommit_memory to 1
func disableKernelMemoryOvercommitHandling() error {
	val, err := utilsysctl.GetSysctl(sysctlVmOvercommitMemory)
	if err != nil {
		return err
	}
	if val == 1 {
		return nil
	}
	glog.V(2).Infof("Updating kernel memory overcommit flag from %v to %v", val, 1)
	err = utilsysctl.SetSysctl(sysctlVmOvercommitMemory, 1)
	if err != nil {
		return err
	}
	return nil
}
Beispiel #11
0
func (plugin *NoopNetworkPlugin) Init(host Host) error {
	// Set bridge-nf-call-iptables=1 to maintain compatibility with older
	// kubernetes versions to ensure the iptables-based kube proxy functions
	// correctly.  Other plugins are responsible for setting this correctly
	// depending on whether or not they connect containers to Linux bridges
	// or use some other mechanism (ie, SDN vswitch).

	// Ensure the netfilter module is loaded on kernel >= 3.18; previously
	// it was built-in.
	utilexec.New().Command("modprobe", "br-netfilter").CombinedOutput()
	if err := utilsysctl.SetSysctl(sysctlBridgeCallIptables, 1); err != nil {
		glog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIptables, err)
	}

	return nil
}
Beispiel #12
0
func (plugin *kubenetNetworkPlugin) Init(host network.Host, hairpinMode componentconfig.HairpinMode, nonMasqueradeCIDR string) error {
	plugin.host = host
	plugin.hairpinMode = hairpinMode
	plugin.nonMasqueradeCIDR = nonMasqueradeCIDR
	plugin.cniConfig = &libcni.CNIConfig{
		Path: []string{DefaultCNIDir, plugin.vendorDir},
	}

	if link, err := findMinMTU(); err == nil {
		plugin.MTU = link.MTU
		glog.V(5).Infof("Using interface %s MTU %d as bridge MTU", link.Name, link.MTU)
	} else {
		glog.Warningf("Failed to find default bridge MTU: %v", err)
	}

	// Since this plugin uses a Linux bridge, set bridge-nf-call-iptables=1
	// is necessary to ensure kube-proxy functions correctly.
	//
	// This will return an error on older kernel version (< 3.18) as the module
	// was built-in, we simply ignore the error here. A better thing to do is
	// to check the kernel version in the future.
	plugin.execer.Command("modprobe", "br-netfilter").CombinedOutput()
	err := utilsysctl.SetSysctl(sysctlBridgeCallIptables, 1)
	if err != nil {
		glog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIptables, err)
	}

	plugin.loConfig, err = libcni.ConfFromBytes([]byte(`{
  "cniVersion": "0.1.0",
  "name": "kubenet-loopback",
  "type": "loopback"
}`))
	if err != nil {
		return fmt.Errorf("Failed to generate loopback config: %v", err)
	}

	plugin.nsenterPath, err = plugin.execer.LookPath("nsenter")
	if err != nil {
		return fmt.Errorf("Failed to find nsenter binary: %v", err)
	}

	// Need to SNAT outbound traffic from cluster
	if err = plugin.ensureMasqRule(); err != nil {
		return err
	}
	return nil
}
Beispiel #13
0
func (realConntracker) SetMax(max int) error {
	glog.Infof("Setting nf_conntrack_max to %d", max)
	if err := sysctl.SetSysctl("net/netfilter/nf_conntrack_max", max); err != nil {
		return err
	}
	// sysfs is expected to be mounted as 'rw'. However, it may be unexpectedly mounted as
	// 'ro' by docker because of a known docker issue (https://github.com/docker/docker/issues/24000).
	// Setting conntrack will fail when sysfs is readonly. When that happens, we don't set conntrack
	// hashsize and return a special error readOnlySysFSError here. The caller should deal with
	// readOnlySysFSError differently.
	writable, err := isSysFSWritable()
	if err != nil {
		return err
	}
	if !writable {
		return readOnlySysFSError
	}
	// TODO: generify this and sysctl to a new sysfs.WriteInt()
	glog.Infof("Setting conntrack hashsize to %d", max/4)
	return ioutil.WriteFile("/sys/module/nf_conntrack/parameters/hashsize", []byte(strconv.Itoa(max/4)), 0640)
}
Beispiel #14
0
func (plugin *OsdnNode) SetupSDN() (bool, error) {
	clusterNetworkCIDR := plugin.networkInfo.ClusterNetwork.String()
	serviceNetworkCIDR := plugin.networkInfo.ServiceNetwork.String()

	localSubnetCIDR := plugin.localSubnetCIDR
	_, ipnet, err := net.ParseCIDR(localSubnetCIDR)
	localSubnetMaskLength, _ := ipnet.Mask.Size()
	localSubnetGateway := netutils.GenerateDefaultGateway(ipnet).String()

	glog.V(5).Infof("[SDN setup] node pod subnet %s gateway %s", ipnet.String(), localSubnetGateway)

	exec := kexec.New()

	if plugin.clearLbr0IptablesRule {
		// Delete docker's left-over lbr0 rule; cannot do this from
		// NewNodePlugin (where docker is cleaned up) because we need
		// localSubnetCIDR which is only valid after plugin start
		ipt := iptables.New(exec, utildbus.New(), iptables.ProtocolIpv4)
		ipt.DeleteRule(iptables.TableNAT, iptables.ChainPostrouting, "-s", localSubnetCIDR, "!", "-o", "lbr0", "-j", "MASQUERADE")
	}

	gwCIDR := fmt.Sprintf("%s/%d", localSubnetGateway, localSubnetMaskLength)
	if plugin.alreadySetUp(gwCIDR, clusterNetworkCIDR) {
		glog.V(5).Infof("[SDN setup] no SDN setup required")
		return false, nil
	}
	glog.V(5).Infof("[SDN setup] full SDN setup required")

	if err := os.MkdirAll("/run/openshift-sdn", 0700); err != nil {
		return false, err
	}
	config := fmt.Sprintf("export OPENSHIFT_CLUSTER_SUBNET=%s", clusterNetworkCIDR)
	err = ioutil.WriteFile("/run/openshift-sdn/config.env", []byte(config), 0644)
	if err != nil {
		return false, err
	}

	err = plugin.ovs.AddBridge("fail-mode=secure", "protocols=OpenFlow13")
	if err != nil {
		return false, err
	}
	_ = plugin.ovs.DeletePort(VXLAN)
	_, err = plugin.ovs.AddPort(VXLAN, 1, "type=vxlan", `options:remote_ip="flow"`, `options:key="flow"`)
	if err != nil {
		return false, err
	}
	_ = plugin.ovs.DeletePort(TUN)
	_, err = plugin.ovs.AddPort(TUN, 2, "type=internal")
	if err != nil {
		return false, err
	}

	otx := plugin.ovs.NewTransaction()
	// Table 0: initial dispatch based on in_port
	// vxlan0
	otx.AddFlow("table=0, priority=200, in_port=1, arp, nw_src=%s, nw_dst=%s, actions=move:NXM_NX_TUN_ID[0..31]->NXM_NX_REG0[],goto_table:1", clusterNetworkCIDR, localSubnetCIDR)
	otx.AddFlow("table=0, priority=200, in_port=1, ip, nw_src=%s, nw_dst=%s, actions=move:NXM_NX_TUN_ID[0..31]->NXM_NX_REG0[],goto_table:1", clusterNetworkCIDR, localSubnetCIDR)
	otx.AddFlow("table=0, priority=150, in_port=1, actions=drop")
	// tun0
	otx.AddFlow("table=0, priority=200, in_port=2, arp, nw_src=%s, nw_dst=%s, actions=goto_table:5", localSubnetGateway, clusterNetworkCIDR)
	otx.AddFlow("table=0, priority=200, in_port=2, ip, actions=goto_table:5")
	otx.AddFlow("table=0, priority=150, in_port=2, actions=drop")
	// else, from a container
	otx.AddFlow("table=0, priority=100, arp, actions=goto_table:2")
	otx.AddFlow("table=0, priority=100, ip, actions=goto_table:2")
	otx.AddFlow("table=0, priority=0, actions=drop")

	// Table 1: VXLAN ingress filtering; filled in by AddHostSubnetRules()
	// eg, "table=1, priority=100, tun_src=${remote_node_ip}, actions=goto_table:5"
	otx.AddFlow("table=1, priority=0, actions=drop")

	// Table 2: from OpenShift container; validate IP/MAC, assign tenant-id; filled in by openshift-sdn-ovs
	// eg, "table=2, priority=100, in_port=${ovs_port}, arp, nw_src=${ipaddr}, arp_sha=${macaddr}, actions=load:${tenant_id}->NXM_NX_REG0[], goto_table:5"
	//     "table=2, priority=100, in_port=${ovs_port}, ip, nw_src=${ipaddr}, actions=load:${tenant_id}->NXM_NX_REG0[], goto_table:3"
	// (${tenant_id} is always 0 for single-tenant)
	otx.AddFlow("table=2, priority=0, actions=drop")

	// Table 3: from OpenShift container; service vs non-service
	otx.AddFlow("table=3, priority=100, ip, nw_dst=%s, actions=goto_table:4", serviceNetworkCIDR)
	otx.AddFlow("table=3, priority=0, actions=goto_table:5")

	// Table 4: from OpenShift container; service dispatch; filled in by AddServiceRules()
	otx.AddFlow("table=4, priority=200, reg0=0, actions=output:2")
	// eg, "table=4, priority=100, reg0=${tenant_id}, ${service_proto}, nw_dst=${service_ip}, tp_dst=${service_port}, actions=output:2"
	otx.AddFlow("table=4, priority=0, actions=drop")

	// Table 5: general routing
	otx.AddFlow("table=5, priority=300, arp, nw_dst=%s, actions=output:2", localSubnetGateway)
	otx.AddFlow("table=5, priority=300, ip, nw_dst=%s, actions=output:2", localSubnetGateway)
	otx.AddFlow("table=5, priority=200, arp, nw_dst=%s, actions=goto_table:6", localSubnetCIDR)
	otx.AddFlow("table=5, priority=200, ip, nw_dst=%s, actions=goto_table:7", localSubnetCIDR)
	otx.AddFlow("table=5, priority=100, arp, nw_dst=%s, actions=goto_table:8", clusterNetworkCIDR)
	otx.AddFlow("table=5, priority=100, ip, nw_dst=%s, actions=goto_table:8", clusterNetworkCIDR)
	otx.AddFlow("table=5, priority=0, ip, actions=goto_table:9")
	otx.AddFlow("table=5, priority=0, arp, actions=drop")

	// Table 6: ARP to container, filled in by openshift-sdn-ovs
	// eg, "table=6, priority=100, arp, nw_dst=${container_ip}, actions=output:${ovs_port}"
	otx.AddFlow("table=6, priority=0, actions=drop")

	// Table 7: IP to container; filled in by openshift-sdn-ovs
	// eg, "table=7, priority=100, reg0=0, ip, nw_dst=${ipaddr}, actions=output:${ovs_port}"
	// eg, "table=7, priority=100, reg0=${tenant_id}, ip, nw_dst=${ipaddr}, actions=output:${ovs_port}"
	otx.AddFlow("table=7, priority=0, actions=drop")

	// Table 8: to remote container; filled in by AddHostSubnetRules()
	// eg, "table=8, priority=100, arp, nw_dst=${remote_subnet_cidr}, actions=move:NXM_NX_REG0[]->NXM_NX_TUN_ID[0..31], set_field:${remote_node_ip}->tun_dst,output:1"
	// eg, "table=8, priority=100, ip, nw_dst=${remote_subnet_cidr}, actions=move:NXM_NX_REG0[]->NXM_NX_TUN_ID[0..31], set_field:${remote_node_ip}->tun_dst,output:1"
	otx.AddFlow("table=8, priority=0, actions=drop")

	// Table 9: egress network policy dispatch; edited by updateEgressNetworkPolicyRules()
	// eg, "table=9, reg0=${tenant_id}, priority=2, ip, nw_dst=${external_cidr}, actions=drop
	otx.AddFlow("table=9, priority=0, actions=output:2")

	err = otx.EndTransaction()
	if err != nil {
		return false, err
	}

	itx := ipcmd.NewTransaction(exec, TUN)
	itx.AddAddress(gwCIDR)
	defer deleteLocalSubnetRoute(TUN, localSubnetCIDR)
	itx.SetLink("mtu", fmt.Sprint(plugin.mtu))
	itx.SetLink("up")
	itx.AddRoute(clusterNetworkCIDR, "proto", "kernel", "scope", "link")
	itx.AddRoute(serviceNetworkCIDR)
	err = itx.EndTransaction()
	if err != nil {
		return false, err
	}

	sysctl := sysctl.New()

	// Enable IP forwarding for ipv4 packets
	err = sysctl.SetSysctl("net/ipv4/ip_forward", 1)
	if err != nil {
		return false, fmt.Errorf("Could not enable IPv4 forwarding: %s", err)
	}
	err = sysctl.SetSysctl(fmt.Sprintf("net/ipv4/conf/%s/forwarding", TUN), 1)
	if err != nil {
		return false, fmt.Errorf("Could not enable IPv4 forwarding on %s: %s", TUN, err)
	}

	// Table 253: rule version; note action is hex bytes separated by '.'
	otx = plugin.ovs.NewTransaction()
	pluginVersion := getPluginVersion(plugin.multitenant)
	otx.AddFlow("%s, %s%s.%s", VERSION_TABLE, VERSION_ACTION, pluginVersion[0], pluginVersion[1])
	err = otx.EndTransaction()
	if err != nil {
		return false, err
	}

	return true, nil
}
Beispiel #15
0
func (c *FlowController) Setup(localSubnetCIDR, clusterNetworkCIDR, servicesNetworkCIDR string, mtu uint) (bool, error) {
	_, ipnet, err := net.ParseCIDR(localSubnetCIDR)
	localSubnetMaskLength, _ := ipnet.Mask.Size()
	localSubnetGateway := netutils.GenerateDefaultGateway(ipnet).String()

	glog.V(5).Infof("[SDN setup] node pod subnet %s gateway %s", ipnet.String(), localSubnetGateway)

	gwCIDR := fmt.Sprintf("%s/%d", localSubnetGateway, localSubnetMaskLength)
	if alreadySetUp(c.multitenant, gwCIDR) {
		glog.V(5).Infof("[SDN setup] no SDN setup required")
		return false, nil
	}
	glog.V(5).Infof("[SDN setup] full SDN setup required")

	mtuStr := fmt.Sprint(mtu)

	itx := ipcmd.NewTransaction(LBR)
	itx.SetLink("down")
	itx.IgnoreError()
	itx.DeleteLink()
	itx.IgnoreError()
	itx.AddLink("type", "bridge")
	itx.AddAddress(gwCIDR)
	itx.SetLink("up")
	err = itx.EndTransaction()
	if err != nil {
		glog.Errorf("Failed to configure docker bridge: %v", err)
		return false, err
	}
	defer deleteLocalSubnetRoute(LBR, localSubnetCIDR)

	glog.V(5).Infof("[SDN setup] docker setup %s mtu %s", LBR, mtuStr)
	out, err := exec.Command("openshift-sdn-docker-setup.sh", LBR, mtuStr).CombinedOutput()
	if err != nil {
		glog.Errorf("Failed to configure docker networking: %v\n%s", err, out)
		return false, err
	} else {
		glog.V(5).Infof("[SDN setup] docker setup success:\n%s", out)
	}

	config := fmt.Sprintf("export OPENSHIFT_CLUSTER_SUBNET=%s", clusterNetworkCIDR)
	err = ioutil.WriteFile("/run/openshift-sdn/config.env", []byte(config), 0644)
	if err != nil {
		return false, err
	}

	itx = ipcmd.NewTransaction(VLINUXBR)
	itx.DeleteLink()
	itx.IgnoreError()
	itx.AddLink("mtu", mtuStr, "type", "veth", "peer", "name", VOVSBR, "mtu", mtuStr)
	itx.SetLink("up")
	itx.SetLink("txqueuelen", "0")
	err = itx.EndTransaction()
	if err != nil {
		return false, err
	}

	itx = ipcmd.NewTransaction(VOVSBR)
	itx.SetLink("up")
	itx.SetLink("txqueuelen", "0")
	err = itx.EndTransaction()
	if err != nil {
		return false, err
	}

	itx = ipcmd.NewTransaction(LBR)
	itx.AddSlave(VLINUXBR)
	err = itx.EndTransaction()
	if err != nil {
		return false, err
	}

	otx := ovs.NewTransaction(BR)
	otx.AddBridge("fail-mode=secure", "protocols=OpenFlow13")
	otx.AddPort(VXLAN, 1, "type=vxlan", `options:remote_ip="flow"`, `options:key="flow"`)
	otx.AddPort(TUN, 2, "type=internal")
	otx.AddPort(VOVSBR, 3)

	// Table 0; VXLAN filtering; the first rule sends un-tunnelled packets
	// to table 1. Additional per-node rules are filled in by controller.go
	otx.AddFlow("table=0, tun_src=0.0.0.0, actions=goto_table:1")
	// eg, "table=0, tun_src=${remote_node}, actions=goto_table:1"

	// Table 1; learn MAC addresses and continue with table 2
	otx.AddFlow("table=1, actions=learn(table=9, priority=200, hard_timeout=900, NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[], load:NXM_NX_TUN_IPV4_SRC[]->NXM_NX_TUN_IPV4_DST[], output:NXM_OF_IN_PORT[]), goto_table:2")

	// Table 2; initial dispatch
	otx.AddFlow("table=2, priority=200, arp, actions=goto_table:9")
	otx.AddFlow("table=2, priority=100, in_port=1, actions=goto_table:3") // vxlan0
	otx.AddFlow("table=2, priority=100, in_port=2, actions=goto_table:6") // tun0
	otx.AddFlow("table=2, priority=100, in_port=3, actions=goto_table:6") // vovsbr
	otx.AddFlow("table=2, priority=0, actions=goto_table:4")              // container

	// Table 3; incoming from vxlan
	otx.AddFlow("table=3, priority=200, ip, nw_dst=%s, actions=output:2", localSubnetGateway)
	if c.multitenant {
		otx.AddFlow("table=3, priority=100, ip, nw_dst=%s, actions=move:NXM_NX_TUN_ID[0..31]->NXM_NX_REG0[], goto_table:6", localSubnetCIDR)
	} else {
		otx.AddFlow("table=3, priority=100, ip, nw_dst=%s, actions=goto_table:9", localSubnetCIDR)
	}

	// Table 4; incoming from container; filled in by openshift-sdn-ovs
	// eg, single-tenant: "table=4, priority=100, in_port=${ovs_port}, ip, nw_src=${ipaddr}, goto_table:6"
	//     multitenant:   "table=4, priority=100, in_port=${ovs_port}, ip, nw_src=${ipaddr}, actions=load:${tenant_id}->NXM_NX_REG0[], goto_table:5"

	// Table 5; service isolation; mostly filled in by AddServiceOFRules()
	if c.multitenant {
		otx.AddFlow("table=5, priority=200, reg0=0, ip, nw_dst=%s, actions=output:2", servicesNetworkCIDR)
		// eg, "table=5, priority=200, ${service_proto}, nw_dst=${service_ip}, tp_dst=${service_port}, actions=output:2"
		otx.AddFlow("table=5, priority=100, ip, nw_dst=%s, actions=drop", servicesNetworkCIDR)
		otx.AddFlow("table=5, priority=0, actions=goto_table:6")
	}

	// Table 6; general routing
	otx.AddFlow("table=6, priority=200, ip, nw_dst=%s, actions=output:2", localSubnetGateway)
	if c.multitenant {
		otx.AddFlow("table=6, priority=175, ip, reg0=0, nw_dst=%s, actions=goto_table:9", localSubnetCIDR)
		otx.AddFlow("table=6, priority=150, ip, nw_dst=%s, actions=goto_table:7", localSubnetCIDR)
	} else {
		otx.AddFlow("table=6, priority=150, ip, nw_dst=%s, actions=goto_table:9", localSubnetCIDR)
	}
	otx.AddFlow("table=6, priority=100, ip, nw_dst=%s, actions=goto_table:8", clusterNetworkCIDR)
	otx.AddFlow("table=6, priority=0, ip, actions=output:2")

	// Table 7; to local container with isolation; filled in by openshift-sdn-ovs
	// eg, "table=7, priority=100, ip, nw_dst=${ipaddr}, reg0=${tenant_id}, actions=output:${ovs_port}"

	// Table 8; to remote container; filled in by AddOFRules()
	// eg, "table=8, priority=100, ip, nw_dst=${remote_subnet_cidr}, actions=move:NXM_NX_REG0[]->NXM_NX_TUN_ID[0..31], set_field:${remote_node_ip}->tun_dst,output:1"

	// Table 9; MAC dispatch / ARP, filled in by Table 1's learn() rule
	// and with per-node vxlan ARP rules by AddOFRules()
	otx.AddFlow("table=9, priority=0, arp, actions=flood")
	// eg, "table=9, priority=100, arp, nw_dst=${remote_subnet_cidr}, actions=move:NXM_NX_REG0[]->NXM_NX_TUN_ID[0..31], set_field:${remote_node_ip}->tun_dst,output:1"

	err = otx.EndTransaction()
	if err != nil {
		return false, err
	}

	itx = ipcmd.NewTransaction(TUN)
	itx.AddAddress(gwCIDR)
	defer deleteLocalSubnetRoute(TUN, localSubnetCIDR)
	itx.SetLink("mtu", mtuStr)
	itx.SetLink("up")
	itx.AddRoute(clusterNetworkCIDR, "proto", "kernel", "scope", "link")
	itx.AddRoute(servicesNetworkCIDR)
	err = itx.EndTransaction()
	if err != nil {
		return false, err
	}

	// Clean up docker0 since docker won't
	itx = ipcmd.NewTransaction("docker0")
	itx.SetLink("down")
	itx.IgnoreError()
	itx.DeleteLink()
	itx.IgnoreError()
	_ = itx.EndTransaction()

	// Disable iptables for linux bridges (and in particular lbr0), ignoring errors.
	// (This has to have been performed in advance for docker-in-docker deployments,
	// since this will fail there).
	_, _ = exec.Command("modprobe", "br_netfilter").CombinedOutput()
	err = sysctl.SetSysctl("net/bridge/bridge-nf-call-iptables", 0)
	if err != nil {
		glog.Warningf("Could not set net.bridge.bridge-nf-call-iptables sysctl: %s", err)
	} else {
		glog.V(5).Infof("[SDN setup] set net.bridge.bridge-nf-call-iptables to 0")
	}

	// Enable IP forwarding for ipv4 packets
	err = sysctl.SetSysctl("net/ipv4/ip_forward", 1)
	if err != nil {
		return false, fmt.Errorf("Could not enable IPv4 forwarding: %s", err)
	}
	err = sysctl.SetSysctl(fmt.Sprintf("net/ipv4/conf/%s/forwarding", TUN), 1)
	if err != nil {
		return false, fmt.Errorf("Could not enable IPv4 forwarding on %s: %s", TUN, err)
	}

	return true, nil
}
Beispiel #16
0
func (realConntracker) SetTCPEstablishedTimeout(seconds int) error {
	glog.Infof("Setting nf_conntrack_tcp_timeout_established to %d", seconds)
	return sysctl.SetSysctl("net/netfilter/nf_conntrack_tcp_timeout_established", seconds)
}
Beispiel #17
0
func (plugin *OsdnNode) SetupSDN() (bool, error) {
	localSubnetCIDR, err := plugin.getLocalSubnet()
	if err != nil {
		return false, err
	}
	clusterNetworkCIDR := plugin.networkInfo.ClusterNetwork.String()
	serviceNetworkCIDR := plugin.networkInfo.ServiceNetwork.String()

	_, ipnet, err := net.ParseCIDR(localSubnetCIDR)
	localSubnetMaskLength, _ := ipnet.Mask.Size()
	localSubnetGateway := netutils.GenerateDefaultGateway(ipnet).String()

	glog.V(5).Infof("[SDN setup] node pod subnet %s gateway %s", ipnet.String(), localSubnetGateway)

	gwCIDR := fmt.Sprintf("%s/%d", localSubnetGateway, localSubnetMaskLength)
	if plugin.alreadySetUp(gwCIDR, clusterNetworkCIDR) {
		glog.V(5).Infof("[SDN setup] no SDN setup required")
		return false, nil
	}
	glog.V(5).Infof("[SDN setup] full SDN setup required")

	mtuStr := fmt.Sprint(plugin.mtu)

	exec := kexec.New()
	itx := ipcmd.NewTransaction(exec, LBR)
	itx.SetLink("down")
	itx.IgnoreError()
	itx.DeleteLink()
	itx.IgnoreError()
	itx.AddLink("type", "bridge")
	itx.AddAddress(gwCIDR)
	itx.SetLink("up")
	err = itx.EndTransaction()
	if err != nil {
		glog.Errorf("Failed to configure docker bridge: %v", err)
		return false, err
	}
	defer deleteLocalSubnetRoute(LBR, localSubnetCIDR)

	glog.V(5).Infof("[SDN setup] docker setup %s mtu %s", LBR, mtuStr)
	out, err := exec.Command("openshift-sdn-docker-setup.sh", LBR, mtuStr).CombinedOutput()
	if err != nil {
		glog.Errorf("Failed to configure docker networking: %v\n%s", err, out)
		return false, err
	} else {
		glog.V(5).Infof("[SDN setup] docker setup success:\n%s", out)
	}

	config := fmt.Sprintf("export OPENSHIFT_CLUSTER_SUBNET=%s", clusterNetworkCIDR)
	err = ioutil.WriteFile("/run/openshift-sdn/config.env", []byte(config), 0644)
	if err != nil {
		return false, err
	}

	itx = ipcmd.NewTransaction(exec, VLINUXBR)
	itx.DeleteLink()
	itx.IgnoreError()
	itx.AddLink("mtu", mtuStr, "type", "veth", "peer", "name", VOVSBR, "mtu", mtuStr)
	itx.SetLink("up")
	itx.SetLink("txqueuelen", "0")
	err = itx.EndTransaction()
	if err != nil {
		return false, err
	}

	itx = ipcmd.NewTransaction(exec, VOVSBR)
	itx.SetLink("up")
	itx.SetLink("txqueuelen", "0")
	err = itx.EndTransaction()
	if err != nil {
		return false, err
	}

	itx = ipcmd.NewTransaction(exec, LBR)
	itx.AddSlave(VLINUXBR)
	err = itx.EndTransaction()
	if err != nil {
		return false, err
	}

	err = plugin.ovs.AddBridge("fail-mode=secure", "protocols=OpenFlow13")
	if err != nil {
		return false, err
	}
	_ = plugin.ovs.DeletePort(VXLAN)
	_, err = plugin.ovs.AddPort(VXLAN, 1, "type=vxlan", `options:remote_ip="flow"`, `options:key="flow"`)
	if err != nil {
		return false, err
	}
	_ = plugin.ovs.DeletePort(TUN)
	_, err = plugin.ovs.AddPort(TUN, 2, "type=internal")
	if err != nil {
		return false, err
	}
	_, err = plugin.ovs.AddPort(VOVSBR, 3)
	if err != nil {
		return false, err
	}

	otx := plugin.ovs.NewTransaction()
	// Table 0: initial dispatch based on in_port
	// vxlan0
	otx.AddFlow("table=0, priority=200, in_port=1, arp, nw_src=%s, nw_dst=%s, actions=move:NXM_NX_TUN_ID[0..31]->NXM_NX_REG0[],goto_table:1", clusterNetworkCIDR, localSubnetCIDR)
	otx.AddFlow("table=0, priority=200, in_port=1, ip, nw_src=%s, nw_dst=%s, actions=move:NXM_NX_TUN_ID[0..31]->NXM_NX_REG0[],goto_table:1", clusterNetworkCIDR, localSubnetCIDR)
	otx.AddFlow("table=0, priority=150, in_port=1, actions=drop")
	// tun0
	otx.AddFlow("table=0, priority=200, in_port=2, arp, nw_src=%s, nw_dst=%s, actions=goto_table:5", localSubnetGateway, clusterNetworkCIDR)
	otx.AddFlow("table=0, priority=200, in_port=2, ip, actions=goto_table:5")
	otx.AddFlow("table=0, priority=150, in_port=2, actions=drop")
	// vovsbr
	otx.AddFlow("table=0, priority=200, in_port=3, arp, nw_src=%s, actions=goto_table:5", localSubnetCIDR)
	otx.AddFlow("table=0, priority=200, in_port=3, ip, nw_src=%s, actions=goto_table:5", localSubnetCIDR)
	otx.AddFlow("table=0, priority=150, in_port=3, actions=drop")
	// else, from a container
	otx.AddFlow("table=0, priority=100, arp, actions=goto_table:2")
	otx.AddFlow("table=0, priority=100, ip, actions=goto_table:2")
	otx.AddFlow("table=0, priority=0, actions=drop")

	// Table 1: VXLAN ingress filtering; filled in by AddHostSubnetRules()
	// eg, "table=1, priority=100, tun_src=${remote_node_ip}, actions=goto_table:5"
	otx.AddFlow("table=1, priority=0, actions=drop")

	// Table 2: from OpenShift container; validate IP/MAC, assign tenant-id; filled in by openshift-sdn-ovs
	// eg, "table=2, priority=100, in_port=${ovs_port}, arp, nw_src=${ipaddr}, arp_sha=${macaddr}, actions=load:${tenant_id}->NXM_NX_REG0[], goto_table:5"
	//     "table=2, priority=100, in_port=${ovs_port}, ip, nw_src=${ipaddr}, actions=load:${tenant_id}->NXM_NX_REG0[], goto_table:3"
	// (${tenant_id} is always 0 for single-tenant)
	otx.AddFlow("table=2, priority=0, actions=drop")

	// Table 3: from OpenShift container; service vs non-service
	otx.AddFlow("table=3, priority=100, ip, nw_dst=%s, actions=goto_table:4", serviceNetworkCIDR)
	otx.AddFlow("table=3, priority=0, actions=goto_table:5")

	// Table 4: from OpenShift container; service dispatch; filled in by AddServiceRules()
	otx.AddFlow("table=4, priority=200, reg0=0, actions=output:2")
	// eg, "table=4, priority=100, reg0=${tenant_id}, ${service_proto}, nw_dst=${service_ip}, tp_dst=${service_port}, actions=output:2"
	otx.AddFlow("table=4, priority=0, actions=drop")

	// Table 5: general routing
	otx.AddFlow("table=5, priority=300, arp, nw_dst=%s, actions=output:2", localSubnetGateway)
	otx.AddFlow("table=5, priority=300, ip, nw_dst=%s, actions=output:2", localSubnetGateway)
	otx.AddFlow("table=5, priority=200, arp, nw_dst=%s, actions=goto_table:6", localSubnetCIDR)
	otx.AddFlow("table=5, priority=200, ip, nw_dst=%s, actions=goto_table:7", localSubnetCIDR)
	otx.AddFlow("table=5, priority=100, arp, nw_dst=%s, actions=goto_table:8", clusterNetworkCIDR)
	otx.AddFlow("table=5, priority=100, ip, nw_dst=%s, actions=goto_table:8", clusterNetworkCIDR)
	otx.AddFlow("table=5, priority=0, ip, actions=goto_table:9")
	otx.AddFlow("table=5, priority=0, arp, actions=drop")

	// Table 6: ARP to container, filled in by openshift-sdn-ovs
	// eg, "table=6, priority=100, arp, nw_dst=${container_ip}, actions=output:${ovs_port}"
	otx.AddFlow("table=6, priority=0, actions=output:3")

	// Table 7: IP to container; filled in by openshift-sdn-ovs
	// eg, "table=7, priority=100, reg0=0, ip, nw_dst=${ipaddr}, actions=output:${ovs_port}"
	// eg, "table=7, priority=100, reg0=${tenant_id}, ip, nw_dst=${ipaddr}, actions=output:${ovs_port}"
	otx.AddFlow("table=7, priority=0, actions=output:3")

	// Table 8: to remote container; filled in by AddHostSubnetRules()
	// eg, "table=8, priority=100, arp, nw_dst=${remote_subnet_cidr}, actions=move:NXM_NX_REG0[]->NXM_NX_TUN_ID[0..31], set_field:${remote_node_ip}->tun_dst,output:1"
	// eg, "table=8, priority=100, ip, nw_dst=${remote_subnet_cidr}, actions=move:NXM_NX_REG0[]->NXM_NX_TUN_ID[0..31], set_field:${remote_node_ip}->tun_dst,output:1"
	otx.AddFlow("table=8, priority=0, actions=drop")

	// Table 9: egress network policy dispatch; edited by updateEgressNetworkPolicyRules()
	// eg, "table=9, reg0=${tenant_id}, priority=2, ip, nw_dst=${external_cidr}, actions=drop
	otx.AddFlow("table=9, priority=0, actions=output:2")

	err = otx.EndTransaction()
	if err != nil {
		return false, err
	}

	itx = ipcmd.NewTransaction(exec, TUN)
	itx.AddAddress(gwCIDR)
	defer deleteLocalSubnetRoute(TUN, localSubnetCIDR)
	itx.SetLink("mtu", mtuStr)
	itx.SetLink("up")
	itx.AddRoute(clusterNetworkCIDR, "proto", "kernel", "scope", "link")
	itx.AddRoute(serviceNetworkCIDR)
	err = itx.EndTransaction()
	if err != nil {
		return false, err
	}

	// Clean up docker0 since docker won't
	itx = ipcmd.NewTransaction(exec, "docker0")
	itx.SetLink("down")
	itx.IgnoreError()
	itx.DeleteLink()
	itx.IgnoreError()
	_ = itx.EndTransaction()

	sysctl := sysctl.New()

	// Disable iptables for linux bridges (and in particular lbr0), ignoring errors.
	// (This has to have been performed in advance for docker-in-docker deployments,
	// since this will fail there).
	_, _ = exec.Command("modprobe", "br_netfilter").CombinedOutput()
	err = sysctl.SetSysctl("net/bridge/bridge-nf-call-iptables", 0)
	if err != nil {
		glog.Warningf("Could not set net.bridge.bridge-nf-call-iptables sysctl: %s", err)
	} else {
		glog.V(5).Infof("[SDN setup] set net.bridge.bridge-nf-call-iptables to 0")
	}

	// Enable IP forwarding for ipv4 packets
	err = sysctl.SetSysctl("net/ipv4/ip_forward", 1)
	if err != nil {
		return false, fmt.Errorf("Could not enable IPv4 forwarding: %s", err)
	}
	err = sysctl.SetSysctl(fmt.Sprintf("net/ipv4/conf/%s/forwarding", TUN), 1)
	if err != nil {
		return false, fmt.Errorf("Could not enable IPv4 forwarding on %s: %s", TUN, err)
	}

	// Table 253: rule version; note action is hex bytes separated by '.'
	otx = plugin.ovs.NewTransaction()
	pluginVersion := getPluginVersion(plugin.multitenant)
	otx.AddFlow("%s, %s%s.%s", VERSION_TABLE, VERSION_ACTION, pluginVersion[0], pluginVersion[1])
	err = otx.EndTransaction()
	if err != nil {
		return false, err
	}

	return true, nil
}