func main() { flag.Parse() // Open a log file for appending if they have not requested to log to the console var logfile *os.File if !*logToConsole { var err error logfile, err = os.OpenFile(logFileName, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0644) if err != nil { log.Fatalf("ERROR: Unable to open log file, exiting: %s\n", err) } defer logfile.Close() log.SetOutput(logfile) } // Check to see if the data suffix list is overrider by an environment variable and then convert the given list, // or the default to a string array for checking against later down the code suffixList := os.Getenv(suffixListOverride) if suffixList == "" { suffixList = defaultSuffixList } log.Printf("INFO: overriding data suffix list to '%s'\n", suffixList) // Support both space and comma separated lists suffixes := strings.FieldsFunc(suffixList, func(r rune) bool { return unicode.IsSpace(r) || r == ',' }) // Check to see if the data suffix list is overrider by an environment variable and then convert the given list, // or the default to a string array for checking against later down the code prefixList := os.Getenv(prefixListOverride) if prefixList == "" { prefixList = defaultPrefixList } log.Printf("INFO: overriding data prefix list to '%s'\n", prefixList) // Support both space and comma separated lists prefixes := strings.FieldsFunc(prefixList, func(r rune) bool { return unicode.IsSpace(r) || r == ',' }) bpData := Gather(&Config{ Verbose: true, DataSuffixList: suffixes, IncludePrefixList: prefixes, ExcludeList: blackList, }) // OK, we have gathered the appropriate data from the environment, what we do with it depends on the hook that // was called. We will use the name of the executable to distinguish which hook was called. switch path.Base(os.Args[0]) { case "southbound-update": // Ignore, return 0 log.Println("INFO: received SOUTHBOUND-UPDATE") case "heartbeat": // Open the cluster file, and if it is not empty then send what is there to ONOS as a cluster Config. If the // request to ONOS fails, don't fail this process as it is just ONOS likely in some initialization state and // take way to long to get to a point where it will respond log.Println("INFO: received HEARTBEAT") return case "peer-status": fallthrough case "peer-update": log.Printf("INFO: received %s\n", strings.ToUpper(path.Base(os.Args[0]))) if peerData, ok := bpData.(map[string]interface{})[peerDataKey].(map[string]interface{}); !ok { log.Printf("INFOI: no peer information received from platform, no action taken") } else { var want []string // We want to verify the the new peering information, if it is included in the message, against the existing // cluster information in the ONOS configuration. If it has changed we will need to update ONOS. if b, err := json.MarshalIndent(peerData, " ", " "); err == nil { log.Printf("INFO: received peer data from the platform:\n %s\n", string(b)) } else { log.Printf("ERROR: unable to decode peer data from platform, curious: %s\n", err) } peers := peerData["peers"].(map[string]interface{}) // If the data set does not contain any peers then we will skip this data set if len(peers) == 0 { log.Printf("INFO: empty peering list from platform, no update action taken") } else { peerLeader := strconv.Itoa(int(peerData["leader"].(float64))) log.Printf("INFO: peer leader ID is: %s\n", peerLeader) myIP, err := onosms.GetMyIP() if err == nil { want = append(want, myIP) log.Printf("INFO: append own IP '%s' to desired cluster list\n", myIP) } else { log.Println("WARN: unable to determine own ID, unable to add it to desired cluster list") } for _, peer := range peers { // If the IP of the peer is not "me" then add it to the list. We always add ourselves and we don't // wanted it added twice ip := peer.(map[string]interface{})["ip"].(string) if myIP != ip { log.Printf("INFO: append peer with IP '%s' to desired cluster list\n", ip) want = append(want, ip) } } onosms.WriteClusterConfig(want) /* // Construct object that represents the ONOS cluster information cluster := make(map[string]interface{}) var nodes []interface{} for _, ip := range want { node := map[string]interface{}{ "id": ip, "ip": ip, "tcpPort": 9876, } nodes = append(nodes, node) } cluster["nodes"] = nodes leader := peerData["peers"].(map[string]interface{})[peerLeader].(map[string]interface{}) // Calculate the prefix by stripping off the last octet and replacing with a wildcard ipPrefix := leader["ip"].(string) idx := strings.LastIndex(ipPrefix, ".") ipPrefix = ipPrefix[:idx] + ".*" cluster["ipPrefix"] = ipPrefix // Construct object that represents the ONOS partition information. this is created by creating // the same number of partitions as there are ONOS instances in the cluster and then putting N - 1 // instances in each partition. // // We sort the list of nodes in the cluster so that each instance will calculate the same partition // table. // // IT IS IMPORTANT THAT EACH INSTANCE HAVE IDENTICAL PARTITION CONFIGURATIONS partitions := make(map[string]interface{}) cnt := len(want) sort.Sort(onosms.IPOrder(want)) size := cnt - 1 if size < 3 { size = cnt } for i := 0; i < cnt; i++ { part := make([]map[string]interface{}, size) for j := 0; j < size; j++ { ip := want[(i+j)%cnt] part[j] = map[string]interface{}{ "id": ip, "ip": ip, "tcpPort": 9876, } } name := fmt.Sprintf("p%d", i+1) partitions[name] = part } tablets := map[string]interface{}{ "nodes": nodes, "partitions": partitions, } // Write the partition table to a known location where it will be picked up by the ONOS "wrapper" and // pushed to ONOS when it is restarted (yes we marshal the data twice, once compact and once with // indentation, not efficient, but i want a pretty log file) if data, err := json.Marshal(tablets); err != nil { log.Printf("ERROR: Unable to encode tables information to write to update file, no file written: %s\n", err) } else { if b, err := json.MarshalIndent(tablets, " ", " "); err == nil { log.Printf("INFO: writting ONOS tablets information to cluster file '%s'\n %s\n", tabletsFileName, string(b)) } // Open / Create the file with an exclusive lock (only one person can handle this at a time) if fTablets, err := os.OpenFile(tabletsFileName, os.O_RDWR|os.O_CREATE, 0644); err == nil { defer fTablets.Close() if err := syscall.Flock(int(fTablets.Fd()), syscall.LOCK_EX); err == nil { defer syscall.Flock(int(fTablets.Fd()), syscall.LOCK_UN) if _, err := fTablets.Write(data); err != nil { log.Printf("ERROR: error writing tablets information to file '%s': %s\n", tabletsFileName, err) } } else { log.Printf("ERROR: unable to aquire lock to tables file '%s': %s\n", tabletsFileName, err) } } else { log.Printf("ERROR: unable to open tablets file '%s': %s\n", tabletsFileName, err) } } // Write the cluster info to a known location where it will be picked up by the ONOS "wrapper" and // pushed to ONOS when it is restarted (yes we marshal the data twice, once compact and once with // indentation, not efficient, but i want a pretty log file) if data, err := json.Marshal(cluster); err != nil { log.Printf("ERROR: Unable to encode cluster information to write to update file, no file written: %s\n", err) } else { if b, err := json.MarshalIndent(cluster, " ", " "); err == nil { log.Printf("INFO: writting ONOS cluster information to cluster file '%s'\n %s\n", clusterFileName, string(b)) } // Open / Create the file with an exclusive lock (only one person can handle this at a time) if fCluster, err := os.OpenFile(clusterFileName, os.O_RDWR|os.O_CREATE, 0644); err == nil { defer fCluster.Close() if err := syscall.Flock(int(fCluster.Fd()), syscall.LOCK_EX); err == nil { defer syscall.Flock(int(fCluster.Fd()), syscall.LOCK_UN) if _, err := fCluster.Write(data); err != nil { log.Printf("ERROR: error writing cluster information to file '%s': %s\n", clusterFileName, err) } } else { log.Printf("ERROR: unable to aquire lock to cluster file '%s': %s\n", clusterFileName, err) } } else { log.Printf("ERROR: unable to open cluster file '%s': %s\n", clusterFileName, err) } } */ // Now that we have written the new ("next") cluster configuration files to a known location, kick // the ONOS wrapper so it will do a HARD restart of ONOS, because ONOS needs a HARD reset in order to // come up propperly, silly ONOS client := &http.Client{} log.Println("INFO: kicking ONOS to the curb") if req, err := http.NewRequest("GET", "http://127.0.0.1:4343/reset", nil); err == nil { if _, err := client.Do(req); err != nil { log.Printf("ERROR: unable to restart ONOS: %s\n", err) } } } } // Return join message always, not the best form, but lets face it the platform should know myIP, err := onosms.GetMyIP() if err != nil { myIP = "0.0.0.0" } b, err := json.Marshal(map[string]interface{}{ "command": "peer-join", "data": map[string]interface{}{ "ip": myIP, }, }) // This is a failsafe. I suppose we could just skip the previous and only do the failsafe, but the above is // cleaner if err != nil { fmt.Println("{\"command\":\"peer-join\",\"data\":{}}") } else { fmt.Println(string(b)) } } }
// watchPods watches updates from kubernetes and updates the cluster information (and kicks onos) when membership // changes. func watchPods(kube string) { cluster := onosms.StringSet{} byName := map[string]string{} // The set in the cluster will always include myself. ip, err := onosms.GetMyIP() if err != nil { // add loopback, may not be the best solution cluster.Add("127.0.0.1") } else { cluster.Add(ip) } // We are going to use a SSL transport with verification turned off timeout, err := time.ParseDuration(httpTimeout) if err != nil { log.Printf("ERROR: unable to parse default HTTP timeout of '%s', will default to no timeout\n", httpTimeout) } bearer := "" if b, err := ioutil.ReadFile(serviceAccountTokenFile); err == nil { log.Println("DEBUG: successfully read authentication bearer from secrets") bearer = string(b) } else { log.Printf("DEBUG: unable to read token file, '%s': %s\n", serviceAccountTokenFile, err) } var caCertPool *x509.CertPool var cacert []byte if cacert, err = ioutil.ReadFile(serviceAccountCACertFile); err == nil { log.Println("DEBUG: successfully read authentication ca certificate") caCertPool = x509.NewCertPool() if ok := caCertPool.AppendCertsFromPEM(cacert); !ok { log.Fatalln("Unable to load cert from file") } else { log.Printf("DEBUG: add certificate to pool: %s\n", string(cacert)) } } else { log.Printf("DEBUG: unable to read ca certificate file, '%s': %s\n", serviceAccountCACertFile, err) } // If we have a bearer and a cert then we should be using those as credentials, if not we will // check the environment kubeCreds := "" if bearer != "" && caCertPool != nil { kubeUser := os.Getenv(kubeUserKey) kubePassword := os.Getenv(kubePasswordKey) log.Printf("DEBUG: ENVIRONMENT '%s' '%s'\n", kubeUser, kubePassword) if kubeUser != "" { val, err := base64.StdEncoding.DecodeString(kubeUser) if err == nil { kubeUser = string(val) val, err = base64.StdEncoding.DecodeString(kubePassword) if err == nil { kubePassword = string(val) kubeCreds = kubeUser + ":" + kubePassword + "@" } else { log.Printf("ERROR: unable to decode password (%s) for kubernetes api: %s\n", kubeUser, err) } } else { log.Printf("ERROR: unable to decode username (%s) for kubernetes api: %s\n", kubePassword, err) } } } log.Printf("INFO: fetch cluster information from 'https://%s%s/api/v1/namespaces/default/pods?labelSelector=%s'\n", kubeCreds, kube, url.QueryEscape(kubeOnosSelector)) req, err := http.NewRequest("GET", "https://"+kubeCreds+kube+"/api/v1/namespaces/default/pods?labelSelector="+url.QueryEscape(kubeOnosSelector), nil) if err != nil { log.Fatalf("ERROR: Unable to build http request to kubernetes API server: %s\n", err) } if len(bearer) > 0 { log.Printf("DEBUG: adding to header: %s %s\n", httpBearerHeader, bearer) req.Header.Add(httpAuthorizationHeader, httpBearerHeader+" "+bearer) } client := &http.Client{ Transport: &http.Transport{ TLSClientConfig: &tls.Config{ InsecureSkipVerify: true, RootCAs: caCertPool, }, TLSHandshakeTimeout: timeout, ResponseHeaderTimeout: timeout, }, Timeout: timeout, } resp, err := client.Do(req) if err != nil { log.Fatalf("ERROR: Unable to communciate to kubernetes to maintain cluster information: %s\n", err) } log.Printf("DEBUG: response back from kubernetes: %s\n", resp.Status) if int(resp.StatusCode/100) != 2 { log.Fatalf("ERROR: bad response code back from kubernetes: %s\n", resp.Status) } var data map[string]interface{} err = json.NewDecoder(resp.Body).Decode(&data) if err != nil { log.Fatalf("ERROR: Unable to parse response back from kubernetes: %s\n", err) } // Populate the cluster set with the base from the query jq := jsonq.NewQuery(data) items, err := jq.Array("items") if err != nil { log.Printf("ERROR: Unexpected response from kubernetes: %s\n", err) } else { modified := false for _, item := range items { jq = jsonq.NewQuery(item) ip, err = jq.String("status.podIP") if err == nil { if !cluster.Contains(ip) { cluster.Add(ip) modified = true } byName[jq.AsString("metadata.name")] = ip } } if modified { onosms.WriteClusterConfig(cluster.Array()) } else { log.Println("INFO: no modification of cluster information based on update from kubernetes") } } log.Printf("INFO: base set of cluster members is %v\n", cluster.Array()) b, _ := json.MarshalIndent(data, "", " ") log.Printf("DEBUG: %s\n", string(b)) errCount := 0 client.Timeout = 0 for { req, err := http.NewRequest("GET", "https://"+kubeCreds+kube+"/api/v1/namespaces/default/pods?labelSelector="+url.QueryEscape(kubeOnosSelector)+"&watch=true", nil) if err != nil { errCount++ if errCount > maxErrorCount { log.Fatalf("ERROR: Too many errors (%d) while attempting to build request to kubernetes: %s", errCount, err) } } if bearer != "" { log.Printf("DEBUG: adding to header: %s %s\n", httpBearerHeader, bearer) req.Header.Add(httpAuthorizationHeader, httpBearerHeader+" "+bearer) } resp, err := client.Do(req) if err != nil { errCount++ if errCount > maxErrorCount { log.Fatalf("ERROR: Too many errors (%d) while attempting to communicate with kubernetes: %s", errCount, err) } } else { // Worked, reset error count errCount = 0 decoder := json.NewDecoder(resp.Body) if err != nil { errCount++ if errCount > maxErrorCount { log.Fatalf("ERROR: Too many errors (%d) while attempting to communicate with kubernetes: %s", errCount, err) } } else { // Worked, reset error count errCount = 0 for { var data map[string]interface{} err := decoder.Decode(&data) if err == nil { log.Printf("DEBUG: cluster = %v\n", cluster) log.Printf("DEUBG: byName = %v\n", byName) b, _ := json.MarshalIndent(data, "", " ") log.Printf("DEBUG: retrieved: %v\n", string(b)) jq := jsonq.NewQuery(data) name := jq.AsString("object.metadata.name") ip, err = jq.String("object.status.podIP") modified := false log.Printf("IP: (%s) %s == %s | %s\n", jq.AsString("type"), name, ip, byName[name]) switch jq.AsString("type") { case "DELETED": if ip == "" { ip = byName[name] } if ip != "" { if cluster.Contains(ip) { cluster.Remove(ip) modified = true } delete(byName, name) } else { log.Printf("ERROR: Unable to determine podIP for pod being deleted: %s\n", err) } case "MODIFIED": fallthrough case "ADDED": if ip != "" { if !cluster.Contains(ip) { cluster.Add(ip) modified = true } byName[name] = ip } else { log.Println("INFO: Update without a podIP") } } if modified { onosms.WriteClusterConfig(cluster.Array()) } else { log.Println("INFO: no modification of cluster information based on update from kubernetes") } } else { log.Printf("ERROR: unable to decode %s\n", err) } } } } } }