Beispiel #1
0
func main() {

	flag.Parse()

	// Open a log file for appending if they have not requested to log to the console
	var logfile *os.File
	if !*logToConsole {
		var err error
		logfile, err = os.OpenFile(logFileName, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0644)
		if err != nil {
			log.Fatalf("ERROR: Unable to open log file, exiting: %s\n", err)
		}
		defer logfile.Close()
		log.SetOutput(logfile)
	}

	// Check to see if the data suffix list is overrider by an environment variable and then convert the given list,
	// or the default to a string array for checking against later down the code
	suffixList := os.Getenv(suffixListOverride)
	if suffixList == "" {
		suffixList = defaultSuffixList
	}
	log.Printf("INFO: overriding data suffix list to '%s'\n", suffixList)

	// Support both space and comma separated lists
	suffixes := strings.FieldsFunc(suffixList,
		func(r rune) bool { return unicode.IsSpace(r) || r == ',' })

	// Check to see if the data suffix list is overrider by an environment variable and then convert the given list,
	// or the default to a string array for checking against later down the code
	prefixList := os.Getenv(prefixListOverride)
	if prefixList == "" {
		prefixList = defaultPrefixList
	}
	log.Printf("INFO: overriding data prefix list to '%s'\n", prefixList)

	// Support both space and comma separated lists
	prefixes := strings.FieldsFunc(prefixList,
		func(r rune) bool { return unicode.IsSpace(r) || r == ',' })

	bpData := Gather(&Config{
		Verbose:           true,
		DataSuffixList:    suffixes,
		IncludePrefixList: prefixes,
		ExcludeList:       blackList,
	})

	// OK, we have gathered the appropriate data from the environment, what we do with it depends on the hook that
	// was called. We will use the name of the executable to distinguish which hook was called.
	switch path.Base(os.Args[0]) {
	case "southbound-update":
		// Ignore, return 0
		log.Println("INFO: received SOUTHBOUND-UPDATE")
	case "heartbeat":
		// Open the cluster file, and if it is not empty then send what is there to ONOS as a cluster Config. If the
		// request to ONOS fails, don't fail this process as it is just ONOS likely in some initialization state and
		// take way to long to get to a point where it will respond
		log.Println("INFO: received HEARTBEAT")
		return
	case "peer-status":
		fallthrough
	case "peer-update":
		log.Printf("INFO: received %s\n", strings.ToUpper(path.Base(os.Args[0])))

		if peerData, ok := bpData.(map[string]interface{})[peerDataKey].(map[string]interface{}); !ok {
			log.Printf("INFOI: no peer information received from platform, no action taken")
		} else {
			var want []string

			// We want to verify the the new peering information, if it is included in the message, against the existing
			// cluster information in the ONOS configuration. If it has changed we will need to update ONOS.
			if b, err := json.MarshalIndent(peerData, "    ", "    "); err == nil {
				log.Printf("INFO: received peer data from the platform:\n    %s\n", string(b))
			} else {
				log.Printf("ERROR: unable to decode peer data from platform, curious: %s\n", err)
			}

			peers := peerData["peers"].(map[string]interface{})

			// If the data set does not contain any peers then we will skip this data set
			if len(peers) == 0 {
				log.Printf("INFO: empty peering list from platform, no update action taken")
			} else {
				peerLeader := strconv.Itoa(int(peerData["leader"].(float64)))
				log.Printf("INFO: peer leader ID is: %s\n", peerLeader)

				myIP, err := onos.GetMyIP()
				if err == nil {
					want = append(want, myIP)
					log.Printf("INFO: append own IP '%s' to desired cluster list\n", myIP)
				} else {
					log.Println("WARN: unable to determine own ID, unable to add it to desired cluster list")
				}
				for _, peer := range peers {
					// If the IP of the peer is not "me" then add it to the list. We always add ourselves and we don't
					// wanted it added twice
					ip := peer.(map[string]interface{})["ip"].(string)
					if myIP != ip {
						log.Printf("INFO: append peer with IP '%s' to desired cluster list\n", ip)
						want = append(want, ip)
					}
				}

				onos.WriteClusterConfig(want)
				/*
					// Construct object that represents the ONOS cluster information
					cluster := make(map[string]interface{})
					var nodes []interface{}
					for _, ip := range want {
						node := map[string]interface{}{
							"id":      ip,
							"ip":      ip,
							"tcpPort": 9876,
						}
						nodes = append(nodes, node)
					}
					cluster["nodes"] = nodes
					leader := peerData["peers"].(map[string]interface{})[peerLeader].(map[string]interface{})

					// Calculate the prefix by stripping off the last octet and replacing with a wildcard
					ipPrefix := leader["ip"].(string)
					idx := strings.LastIndex(ipPrefix, ".")
					ipPrefix = ipPrefix[:idx] + ".*"
					cluster["ipPrefix"] = ipPrefix

					// Construct object that represents the ONOS partition information. this is created by creating
					// the same number of partitions as there are ONOS instances in the cluster and then putting N - 1
					// instances in each partition.
					//
					// We sort the list of nodes in the cluster so that each instance will calculate the same partition
					// table.
					//
					// IT IS IMPORTANT THAT EACH INSTANCE HAVE IDENTICAL PARTITION CONFIGURATIONS
					partitions := make(map[string]interface{})
					cnt := len(want)
					sort.Sort(onos.IPOrder(want))

					size := cnt - 1
					if size < 3 {
						size = cnt
					}
					for i := 0; i < cnt; i++ {
						part := make([]map[string]interface{}, size)
						for j := 0; j < size; j++ {
							ip := want[(i+j)%cnt]
							part[j] = map[string]interface{}{
								"id":      ip,
								"ip":      ip,
								"tcpPort": 9876,
							}
						}
						name := fmt.Sprintf("p%d", i+1)
						partitions[name] = part
					}

					tablets := map[string]interface{}{
						"nodes":      nodes,
						"partitions": partitions,
					}

					// Write the partition table to a known location where it will be picked up by the ONOS "wrapper" and
					// pushed to ONOS when it is restarted (yes we marshal the data twice, once compact and once with
					// indentation, not efficient, but i want a pretty log file)
					if data, err := json.Marshal(tablets); err != nil {
						log.Printf("ERROR: Unable to encode tables information to write to update file, no file written: %s\n", err)
					} else {
						if b, err := json.MarshalIndent(tablets, "    ", "    "); err == nil {
							log.Printf("INFO: writting ONOS tablets information to cluster file '%s'\n    %s\n",
								tabletsFileName, string(b))
						}
						// Open / Create the file with an exclusive lock (only one person can handle this at a time)
						if fTablets, err := os.OpenFile(tabletsFileName, os.O_RDWR|os.O_CREATE, 0644); err == nil {
							defer fTablets.Close()
							if err := syscall.Flock(int(fTablets.Fd()), syscall.LOCK_EX); err == nil {
								defer syscall.Flock(int(fTablets.Fd()), syscall.LOCK_UN)
								if _, err := fTablets.Write(data); err != nil {
									log.Printf("ERROR: error writing tablets information to file '%s': %s\n",
										tabletsFileName, err)
								}
							} else {
								log.Printf("ERROR: unable to aquire lock to tables file '%s': %s\n", tabletsFileName, err)
							}
						} else {
							log.Printf("ERROR: unable to open tablets file '%s': %s\n", tabletsFileName, err)
						}
					}

					// Write the cluster info to a known location where it will be picked up by the ONOS "wrapper" and
					// pushed to ONOS when it is restarted (yes we marshal the data twice, once compact and once with
					// indentation, not efficient, but i want a pretty log file)
					if data, err := json.Marshal(cluster); err != nil {
						log.Printf("ERROR: Unable to encode cluster information to write to update file, no file written: %s\n", err)
					} else {
						if b, err := json.MarshalIndent(cluster, "    ", "    "); err == nil {
							log.Printf("INFO: writting ONOS cluster information to cluster file '%s'\n    %s\n",
								clusterFileName, string(b))
						}
						// Open / Create the file with an exclusive lock (only one person can handle this at a time)
						if fCluster, err := os.OpenFile(clusterFileName, os.O_RDWR|os.O_CREATE, 0644); err == nil {
							defer fCluster.Close()
							if err := syscall.Flock(int(fCluster.Fd()), syscall.LOCK_EX); err == nil {
								defer syscall.Flock(int(fCluster.Fd()), syscall.LOCK_UN)
								if _, err := fCluster.Write(data); err != nil {
									log.Printf("ERROR: error writing cluster information to file '%s': %s\n",
										clusterFileName, err)
								}
							} else {
								log.Printf("ERROR: unable to aquire lock to cluster file '%s': %s\n", clusterFileName, err)
							}
						} else {
							log.Printf("ERROR: unable to open cluster file '%s': %s\n", clusterFileName, err)
						}
					}
				*/
				// Now that we have written the new ("next") cluster configuration files to a known location, kick
				// the ONOS wrapper so it will do a HARD restart of ONOS, because ONOS needs a HARD reset in order to
				// come up propperly, silly ONOS
				client := &http.Client{}
				log.Println("INFO: kicking ONOS to the curb")
				if req, err := http.NewRequest("GET", "http://127.0.0.1:4343/reset", nil); err == nil {
					if _, err := client.Do(req); err != nil {
						log.Printf("ERROR: unable to restart ONOS: %s\n", err)
					}
				}
			}
		}

		// Return join message always, not the best form, but lets face it the platform should know
		myIP, err := onos.GetMyIP()
		if err != nil {
			myIP = "0.0.0.0"
		}
		b, err := json.Marshal(map[string]interface{}{
			"command": "peer-join",
			"data": map[string]interface{}{
				"ip": myIP,
			},
		})

		// This is a failsafe. I suppose we could just skip the previous and only do the failsafe, but the above is
		// cleaner
		if err != nil {
			fmt.Println("{\"command\":\"peer-join\",\"data\":{}}")
		} else {
			fmt.Println(string(b))
		}
	}
}
Beispiel #2
0
// watchPods watches updates from kubernetes and updates the cluster information (and kicks onos) when membership
// changes.
func watchPods(kube string) {

	cluster := onos.StringSet{}
	// The set in the cluster will always include myself.
	ip, err := onos.GetMyIP()
	if err != nil {
		// add loopback, may not be the best solution
		cluster.Add("127.0.0.1")
	} else {
		cluster.Add(ip)
	}

	// We are going to use a SSL transport with verification turned off
	timeout, err := time.ParseDuration(httpTimeout)
	if err != nil {
		log.Printf("ERROR: unable to parse default HTTP timeout of '%s', will default to no timeout\n", httpTimeout)
	}
	tr := &http.Transport{
		TLSClientConfig:       &tls.Config{InsecureSkipVerify: true},
		TLSHandshakeTimeout:   timeout,
		ResponseHeaderTimeout: timeout,
	}
	client := &http.Client{
		Transport: tr,
		Timeout:   timeout,
	}

	log.Printf("INFO: fetch cluster information from 'https://%s@%s/api/v1/namespaces/default/pods?labelSelector=%s'\n",
		kubeCreds, kube, url.QueryEscape(kubeOnosSelector))

	resp, err := client.Get("https://" + kubeCreds + "@" + kube + "/api/v1/namespaces/default/pods?labelSelector=" + url.QueryEscape(kubeOnosSelector))
	if err != nil {
		log.Fatalf("ERROR: Unable to communciate to kubernetes to maintain cluster information: %s\n", err)
	}

	var data map[string]interface{}
	err = json.NewDecoder(resp.Body).Decode(&data)
	if err != nil {
		log.Fatalf("ERROR: Unable to parse response back from kubernetes: %s\n", err)
	}

	// Populate the cluster set with the base from the query
	jq := jsonq.NewQuery(data)
	items, err := jq.Array("items")
	if err != nil {
		log.Printf("ERROR: Unexpected response from kubernetes: %s\n", err)
	} else {
		modified := false
		for _, item := range items {
			jq = jsonq.NewQuery(item)
			ip, err = jq.String("status.podIP")
			if err == nil {
				if !cluster.Contains(ip) {
					cluster.Add(ip)
					modified = true
				}
			}
		}
		if modified {
			onos.WriteClusterConfig(cluster.Array())
		} else {
			log.Println("INFO: no modification of cluster information based on update from kubernetes")
		}
	}

	log.Printf("INFO: base set of cluster members is %v\n", cluster.Array())

	b, _ := json.MarshalIndent(data, "", "    ")
	log.Printf("DEBUG: %s\n", string(b))

	errCount := 0
	client.Timeout = 0
	for {
		resp, err = client.Get("https://" + kubeCreds + "@" + kube + "/api/v1/namespaces/default/pods?labelSelector=" + url.QueryEscape(kubeOnosSelector) + "&watch=true")
		if err != nil {
			errCount++
			if errCount > maxErrorCount {
				log.Fatalf("ERROR: Too many errors (%d) while attempting to communicate with kubernetes: %s", errCount, err)
			}
		} else {
			// Worked, reset error count
			errCount = 0

			decoder := json.NewDecoder(resp.Body)
			if err != nil {
				errCount++
				if errCount > maxErrorCount {
					log.Fatalf("ERROR: Too many errors (%d) while attempting to communicate with kubernetes: %s", errCount, err)
				}
			} else {
				// Worked, reset error count
				errCount = 0

				for {
					var data map[string]interface{}
					err := decoder.Decode(&data)
					if err == nil {
						b, _ := json.MarshalIndent(data, "", "    ")
						log.Printf("DEBUG: retrieved: %v\n", string(b))
						jq := jsonq.NewQuery(data)
						ip, err = jq.String("object.status.podIP")
						if err == nil {
							modified := false
							log.Printf("IP: (%s) %s == %s\n", jq.AsString("type"), jq.AsString("object.metadata.name"),
								jq.AsString("object.status.podIP"))
							switch jq.AsString("type") {
							case "DELETED":
								if cluster.Contains(ip) {
									cluster.Remove(ip)
									modified = true
								}
							case "MODIFIED":
								fallthrough
							case "ADDED":
								if !cluster.Contains(ip) {
									cluster.Add(ip)
									modified = true
								}
							}
							if modified {
								onos.WriteClusterConfig(cluster.Array())
							} else {
								log.Println("INFO: no modification of cluster information based on update from kubernetes")
							}
						} else {
							log.Printf("ERROR COULD NOT FIND IP: %s\n", err)
						}
					} else {
						log.Printf("ERROR: unable to decode %s\n", err)
					}
				}
			}
		}
	}
}