func main() { flag.Parse() // Open a log file for appending if they have not requested to log to the console var logfile *os.File if !*logToConsole { var err error logfile, err = os.OpenFile(logFileName, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0644) if err != nil { log.Fatalf("ERROR: Unable to open log file, exiting: %s\n", err) } defer logfile.Close() log.SetOutput(logfile) } // Check to see if the data suffix list is overrider by an environment variable and then convert the given list, // or the default to a string array for checking against later down the code suffixList := os.Getenv(suffixListOverride) if suffixList == "" { suffixList = defaultSuffixList } log.Printf("INFO: overriding data suffix list to '%s'\n", suffixList) // Support both space and comma separated lists suffixes := strings.FieldsFunc(suffixList, func(r rune) bool { return unicode.IsSpace(r) || r == ',' }) // Check to see if the data suffix list is overrider by an environment variable and then convert the given list, // or the default to a string array for checking against later down the code prefixList := os.Getenv(prefixListOverride) if prefixList == "" { prefixList = defaultPrefixList } log.Printf("INFO: overriding data prefix list to '%s'\n", prefixList) // Support both space and comma separated lists prefixes := strings.FieldsFunc(prefixList, func(r rune) bool { return unicode.IsSpace(r) || r == ',' }) bpData := Gather(&Config{ Verbose: true, DataSuffixList: suffixes, IncludePrefixList: prefixes, ExcludeList: blackList, }) // OK, we have gathered the appropriate data from the environment, what we do with it depends on the hook that // was called. We will use the name of the executable to distinguish which hook was called. switch path.Base(os.Args[0]) { case "southbound-update": // Ignore, return 0 log.Println("INFO: received SOUTHBOUND-UPDATE") case "heartbeat": // Open the cluster file, and if it is not empty then send what is there to ONOS as a cluster Config. If the // request to ONOS fails, don't fail this process as it is just ONOS likely in some initialization state and // take way to long to get to a point where it will respond log.Println("INFO: received HEARTBEAT") return case "peer-status": fallthrough case "peer-update": log.Printf("INFO: received %s\n", strings.ToUpper(path.Base(os.Args[0]))) if peerData, ok := bpData.(map[string]interface{})[peerDataKey].(map[string]interface{}); !ok { log.Printf("INFOI: no peer information received from platform, no action taken") } else { var want []string // We want to verify the the new peering information, if it is included in the message, against the existing // cluster information in the ONOS configuration. If it has changed we will need to update ONOS. if b, err := json.MarshalIndent(peerData, " ", " "); err == nil { log.Printf("INFO: received peer data from the platform:\n %s\n", string(b)) } else { log.Printf("ERROR: unable to decode peer data from platform, curious: %s\n", err) } peers := peerData["peers"].(map[string]interface{}) // If the data set does not contain any peers then we will skip this data set if len(peers) == 0 { log.Printf("INFO: empty peering list from platform, no update action taken") } else { peerLeader := strconv.Itoa(int(peerData["leader"].(float64))) log.Printf("INFO: peer leader ID is: %s\n", peerLeader) myIP, err := onos.GetMyIP() if err == nil { want = append(want, myIP) log.Printf("INFO: append own IP '%s' to desired cluster list\n", myIP) } else { log.Println("WARN: unable to determine own ID, unable to add it to desired cluster list") } for _, peer := range peers { // If the IP of the peer is not "me" then add it to the list. We always add ourselves and we don't // wanted it added twice ip := peer.(map[string]interface{})["ip"].(string) if myIP != ip { log.Printf("INFO: append peer with IP '%s' to desired cluster list\n", ip) want = append(want, ip) } } onos.WriteClusterConfig(want) /* // Construct object that represents the ONOS cluster information cluster := make(map[string]interface{}) var nodes []interface{} for _, ip := range want { node := map[string]interface{}{ "id": ip, "ip": ip, "tcpPort": 9876, } nodes = append(nodes, node) } cluster["nodes"] = nodes leader := peerData["peers"].(map[string]interface{})[peerLeader].(map[string]interface{}) // Calculate the prefix by stripping off the last octet and replacing with a wildcard ipPrefix := leader["ip"].(string) idx := strings.LastIndex(ipPrefix, ".") ipPrefix = ipPrefix[:idx] + ".*" cluster["ipPrefix"] = ipPrefix // Construct object that represents the ONOS partition information. this is created by creating // the same number of partitions as there are ONOS instances in the cluster and then putting N - 1 // instances in each partition. // // We sort the list of nodes in the cluster so that each instance will calculate the same partition // table. // // IT IS IMPORTANT THAT EACH INSTANCE HAVE IDENTICAL PARTITION CONFIGURATIONS partitions := make(map[string]interface{}) cnt := len(want) sort.Sort(onos.IPOrder(want)) size := cnt - 1 if size < 3 { size = cnt } for i := 0; i < cnt; i++ { part := make([]map[string]interface{}, size) for j := 0; j < size; j++ { ip := want[(i+j)%cnt] part[j] = map[string]interface{}{ "id": ip, "ip": ip, "tcpPort": 9876, } } name := fmt.Sprintf("p%d", i+1) partitions[name] = part } tablets := map[string]interface{}{ "nodes": nodes, "partitions": partitions, } // Write the partition table to a known location where it will be picked up by the ONOS "wrapper" and // pushed to ONOS when it is restarted (yes we marshal the data twice, once compact and once with // indentation, not efficient, but i want a pretty log file) if data, err := json.Marshal(tablets); err != nil { log.Printf("ERROR: Unable to encode tables information to write to update file, no file written: %s\n", err) } else { if b, err := json.MarshalIndent(tablets, " ", " "); err == nil { log.Printf("INFO: writting ONOS tablets information to cluster file '%s'\n %s\n", tabletsFileName, string(b)) } // Open / Create the file with an exclusive lock (only one person can handle this at a time) if fTablets, err := os.OpenFile(tabletsFileName, os.O_RDWR|os.O_CREATE, 0644); err == nil { defer fTablets.Close() if err := syscall.Flock(int(fTablets.Fd()), syscall.LOCK_EX); err == nil { defer syscall.Flock(int(fTablets.Fd()), syscall.LOCK_UN) if _, err := fTablets.Write(data); err != nil { log.Printf("ERROR: error writing tablets information to file '%s': %s\n", tabletsFileName, err) } } else { log.Printf("ERROR: unable to aquire lock to tables file '%s': %s\n", tabletsFileName, err) } } else { log.Printf("ERROR: unable to open tablets file '%s': %s\n", tabletsFileName, err) } } // Write the cluster info to a known location where it will be picked up by the ONOS "wrapper" and // pushed to ONOS when it is restarted (yes we marshal the data twice, once compact and once with // indentation, not efficient, but i want a pretty log file) if data, err := json.Marshal(cluster); err != nil { log.Printf("ERROR: Unable to encode cluster information to write to update file, no file written: %s\n", err) } else { if b, err := json.MarshalIndent(cluster, " ", " "); err == nil { log.Printf("INFO: writting ONOS cluster information to cluster file '%s'\n %s\n", clusterFileName, string(b)) } // Open / Create the file with an exclusive lock (only one person can handle this at a time) if fCluster, err := os.OpenFile(clusterFileName, os.O_RDWR|os.O_CREATE, 0644); err == nil { defer fCluster.Close() if err := syscall.Flock(int(fCluster.Fd()), syscall.LOCK_EX); err == nil { defer syscall.Flock(int(fCluster.Fd()), syscall.LOCK_UN) if _, err := fCluster.Write(data); err != nil { log.Printf("ERROR: error writing cluster information to file '%s': %s\n", clusterFileName, err) } } else { log.Printf("ERROR: unable to aquire lock to cluster file '%s': %s\n", clusterFileName, err) } } else { log.Printf("ERROR: unable to open cluster file '%s': %s\n", clusterFileName, err) } } */ // Now that we have written the new ("next") cluster configuration files to a known location, kick // the ONOS wrapper so it will do a HARD restart of ONOS, because ONOS needs a HARD reset in order to // come up propperly, silly ONOS client := &http.Client{} log.Println("INFO: kicking ONOS to the curb") if req, err := http.NewRequest("GET", "http://127.0.0.1:4343/reset", nil); err == nil { if _, err := client.Do(req); err != nil { log.Printf("ERROR: unable to restart ONOS: %s\n", err) } } } } // Return join message always, not the best form, but lets face it the platform should know myIP, err := onos.GetMyIP() if err != nil { myIP = "0.0.0.0" } b, err := json.Marshal(map[string]interface{}{ "command": "peer-join", "data": map[string]interface{}{ "ip": myIP, }, }) // This is a failsafe. I suppose we could just skip the previous and only do the failsafe, but the above is // cleaner if err != nil { fmt.Println("{\"command\":\"peer-join\",\"data\":{}}") } else { fmt.Println(string(b)) } } }
// watchPods watches updates from kubernetes and updates the cluster information (and kicks onos) when membership // changes. func watchPods(kube string) { cluster := onos.StringSet{} // The set in the cluster will always include myself. ip, err := onos.GetMyIP() if err != nil { // add loopback, may not be the best solution cluster.Add("127.0.0.1") } else { cluster.Add(ip) } // We are going to use a SSL transport with verification turned off timeout, err := time.ParseDuration(httpTimeout) if err != nil { log.Printf("ERROR: unable to parse default HTTP timeout of '%s', will default to no timeout\n", httpTimeout) } tr := &http.Transport{ TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, TLSHandshakeTimeout: timeout, ResponseHeaderTimeout: timeout, } client := &http.Client{ Transport: tr, Timeout: timeout, } log.Printf("INFO: fetch cluster information from 'https://%s@%s/api/v1/namespaces/default/pods?labelSelector=%s'\n", kubeCreds, kube, url.QueryEscape(kubeOnosSelector)) resp, err := client.Get("https://" + kubeCreds + "@" + kube + "/api/v1/namespaces/default/pods?labelSelector=" + url.QueryEscape(kubeOnosSelector)) if err != nil { log.Fatalf("ERROR: Unable to communciate to kubernetes to maintain cluster information: %s\n", err) } var data map[string]interface{} err = json.NewDecoder(resp.Body).Decode(&data) if err != nil { log.Fatalf("ERROR: Unable to parse response back from kubernetes: %s\n", err) } // Populate the cluster set with the base from the query jq := jsonq.NewQuery(data) items, err := jq.Array("items") if err != nil { log.Printf("ERROR: Unexpected response from kubernetes: %s\n", err) } else { modified := false for _, item := range items { jq = jsonq.NewQuery(item) ip, err = jq.String("status.podIP") if err == nil { if !cluster.Contains(ip) { cluster.Add(ip) modified = true } } } if modified { onos.WriteClusterConfig(cluster.Array()) } else { log.Println("INFO: no modification of cluster information based on update from kubernetes") } } log.Printf("INFO: base set of cluster members is %v\n", cluster.Array()) b, _ := json.MarshalIndent(data, "", " ") log.Printf("DEBUG: %s\n", string(b)) errCount := 0 client.Timeout = 0 for { resp, err = client.Get("https://" + kubeCreds + "@" + kube + "/api/v1/namespaces/default/pods?labelSelector=" + url.QueryEscape(kubeOnosSelector) + "&watch=true") if err != nil { errCount++ if errCount > maxErrorCount { log.Fatalf("ERROR: Too many errors (%d) while attempting to communicate with kubernetes: %s", errCount, err) } } else { // Worked, reset error count errCount = 0 decoder := json.NewDecoder(resp.Body) if err != nil { errCount++ if errCount > maxErrorCount { log.Fatalf("ERROR: Too many errors (%d) while attempting to communicate with kubernetes: %s", errCount, err) } } else { // Worked, reset error count errCount = 0 for { var data map[string]interface{} err := decoder.Decode(&data) if err == nil { b, _ := json.MarshalIndent(data, "", " ") log.Printf("DEBUG: retrieved: %v\n", string(b)) jq := jsonq.NewQuery(data) ip, err = jq.String("object.status.podIP") if err == nil { modified := false log.Printf("IP: (%s) %s == %s\n", jq.AsString("type"), jq.AsString("object.metadata.name"), jq.AsString("object.status.podIP")) switch jq.AsString("type") { case "DELETED": if cluster.Contains(ip) { cluster.Remove(ip) modified = true } case "MODIFIED": fallthrough case "ADDED": if !cluster.Contains(ip) { cluster.Add(ip) modified = true } } if modified { onos.WriteClusterConfig(cluster.Array()) } else { log.Println("INFO: no modification of cluster information based on update from kubernetes") } } else { log.Printf("ERROR COULD NOT FIND IP: %s\n", err) } } else { log.Printf("ERROR: unable to decode %s\n", err) } } } } } }