/* * Handle a DELETE /tegu/mirrors/<name>/[?cookie=<cookie>] request. */ func mirror_delete(in *http.Request, out http.ResponseWriter, data []byte) (code int, msg string) { name, cookie := getNameAndCookie(in) mirror := lookupMirror(name, cookie) if mirror == nil { code = http.StatusNotFound msg = "Not found." return } if !mirror.Is_valid_cookie(&cookie) { code = http.StatusUnauthorized msg = "Unauthorized." return } req := ipc.Mk_chmsg() my_ch := make(chan *ipc.Chmsg) // allocate channel for responses to our requests defer close(my_ch) // close it on return namepluscookie := []*string{&name, &cookie} req.Send_req(rmgr_ch, my_ch, REQ_DEL, namepluscookie, nil) // remove the reservation req = <-my_ch // wait for completion if req.State == nil { ckptreq := ipc.Mk_chmsg() ckptreq.Send_req(rmgr_ch, nil, REQ_CHKPT, nil, nil) // request a chkpt now, but don't wait on it } code = http.StatusNoContent msg = "" return }
/* Send a passthrough flowmod generation request to the agent manager. This is basically taking the struct that the reservation manager filled in and converting it to a map. Send a bandwidth endpoint flow-mod request to the agent manager. This is little more than a wrapper that converts the fq_req into an agent request. The ultimate agent action is to put in all needed flow-mods on an endpoint host in one go, so no need for individual requests for each and no need for tegu to understand the acutal flow-mod mechanics any more. Yes, this probably _could_ be pushed up into the reservation manager and sent from there to the agent manager, but for now, since the ip2mac information is local to fq-mgr, we'll keep it here. (That info is local to fq-mgr b/c in the original Tegu it came straight in from skoogi and it was fq-mgr's job to interface with skoogi.) */ func send_pt_fmods(data *Fq_req, ip2mac map[string]*string, phost_suffix *string) { if *data.Swid == "" { // we must have a switch name to set bandwidth fmods fq_sheep.Baa(1, "unable to send passthrough fmod request to agent: no switch defined in input data") return } host := data.Swid if phost_suffix != nil { // we need to add the physical host suffix host = add_phost_suffix(host, phost_suffix) } if data.Match.Smac != nil { // caller can pass in IP and we'll convert it if ip2mac[*data.Match.Smac] != nil { data.Match.Smac = ip2mac[*data.Match.Smac] // res-mgr thinks in IP, flow-mods need mac; convert } } msg := &agent_cmd{Ctype: "action_list"} // create a message for agent manager to send to an agent msg.Actions = make([]action, 1) // just a single action msg.Actions[0].Atype = "passthru" // set all related passthrough flow-mods msg.Actions[0].Hosts = make([]string, 1) // passthrough flow-mods created on just one host msg.Actions[0].Hosts[0] = *host msg.Actions[0].Data = data.To_pt_map() // convert useful data from caller into parms for agent json, err := json.Marshal(msg) // bundle into a json string if err != nil { fq_sheep.Baa(0, "unable to build json to set passthrough flow-mods") } else { tmsg := ipc.Mk_chmsg() tmsg.Send_req(am_ch, nil, REQ_SENDSHORT, string(json), nil) // send as a short request to one agent } fq_sheep.Baa(2, "passthru flow-mod request sent to agent manager: %s", json) }
/* Send a bandwidth endpoint flow-mod request to the agent manager. This is little more than a wrapper that converts the fq_req into an agent request. The ultimate agent action is to put in all needed flow-mods on an endpoint host in one go, so no need for individual requests for each and no need for tegu to understand the acutal flow-mod mechanics any more. Yes, this probably _could_ be pushed up into the reservation manager and sent from there to the agent manager, but for now, since the ip2mac information is local to fq-mgr, we'll keep it here. (That info is local to fq-mgr b/c in the original Tegu it came straight in from skoogi and it was fq-mgr's job to interface with skoogi.) */ func send_bw_fmods(data *Fq_req, ip2mac map[string]*string, phost_suffix *string) { if data.Espq.Switch == "" { // we must have a switch name to set bandwidth fmods fq_sheep.Baa(1, "unable to send bw-fmods request to agent: no switch defined in input data") return } host := &data.Espq.Switch // Espq.Switch has real name (host) of switch if phost_suffix != nil { // we need to add the physical host suffix host = add_phost_suffix(host, phost_suffix) } data.Match.Smac = ip2mac[*data.Match.Ip1] // res-mgr thinks in IP, flow-mods need mac; convert data.Match.Dmac = ip2mac[*data.Match.Ip2] // add to data for To_bw_map() call later msg := &agent_cmd{Ctype: "action_list"} // create a message for agent manager to send to an agent msg.Actions = make([]action, 1) // just a single action msg.Actions[0].Atype = "bw_fmod" // set all related bandwidth flow-mods for an endpoint msg.Actions[0].Hosts = make([]string, 1) // bw endpoint flow-mods created on just one host msg.Actions[0].Hosts[0] = *host msg.Actions[0].Data = data.To_bw_map() // convert useful data from caller into parms for agent json, err := json.Marshal(msg) // bundle into a json string if err != nil { fq_sheep.Baa(0, "unable to build json to set flow mod") } else { tmsg := ipc.Mk_chmsg() tmsg.Send_req(am_ch, nil, REQ_SENDSHORT, string(json), nil) // send as a short request to one agent } fq_sheep.Baa(2, "bandwidth endpoint flow-mod request sent to agent manager: %s", json) }
/* Looks for the named reservation and deletes it if found. The cookie must be either the supper cookie, or the cookie that the user supplied when the reservation was created. Deletion is affected by resetting the expiry time on the pledge to now + a few seconds. This will cause a new set of flow-mods to be sent out with an expiry time that will take them out post haste and without the need to send "delete" flow-mods out. This function sends a request to the network manager to delete the related queues. This must be done here so as to prevent any issues with the loosely coupled management of reservation and queue settings. It is VERY IMPORTANT to delete the reservation from the network perspective BEFORE the expiry time is reset. If it is reset first then the network splits timeslices based on the new expiry and queues end up dangling. */ func (inv *Inventory) Del_res(name *string, cookie *string) (state error) { gp, state := inv.Get_res(name, cookie) if gp != nil { rm_sheep.Baa(2, "resgmgr: deleted reservation: %s", (*gp).To_str()) state = nil switch p := (*gp).(type) { case *gizmos.Pledge_mirror: p.Set_expiry(time.Now().Unix()) // expire the mirror NOW p.Set_pushed() // need this to force undo to occur case *gizmos.Pledge_bw, *gizmos.Pledge_bwow: // network handles either type ch := make(chan *ipc.Chmsg) // do not close -- senders close channels req := ipc.Mk_chmsg() req.Send_req(nw_ch, ch, REQ_DEL, p, nil) // delete from the network point of view req = <-ch // wait for response from network state = req.State p.Set_expiry(time.Now().Unix() + 15) // set the expiry to 15s from now which will force it out (*gp).Reset_pushed() // force push of flow-mods that reset the expiry case *gizmos.Pledge_pass: p.Set_expiry(time.Now().Unix() + 15) // set the expiry to 15s from now which will force it out (*gp).Reset_pushed() // force push of flow-mods that reset the expiry } } else { rm_sheep.Baa(2, "resgmgr: unable to delete reservation: not found: %s", *name) } return }
/* * Push a "delete mirror" request out to an agent in order to remove the mirror. */ func undo_mirror_reservation(gp *gizmos.Pledge, rname string, ch chan *ipc.Chmsg) { p, ok := (*gp).(*gizmos.Pledge_mirror) // better be a mirroring pledge if !ok { rm_sheep.Baa(1, "internal error: pledge passed to undo_mirror_reservations wasn't a mirror pledge") (*gp).Set_pushed() // prevent looping until it expires return } id := p.Get_id() // This is somewhat of a hack, but as long as the code in tegu_agent:do_mirrorwiz doesn't change, it should work arg := *id opts := p.Get_Options() if opts != nil && *opts != "" { arg = fmt.Sprintf("-o%s %s", *opts, *id) } host := p.Get_qid() rm_sheep.Baa(1, "Deleting mirror %s on host %s", *id, *host) json := `{ "ctype": "action_list", "actions": [ { ` json += `"atype": "mirrorwiz", ` json += fmt.Sprintf(`"hosts": [ %q ], `, *host) json += fmt.Sprintf(`"qdata": [ "del", %q ] `, arg) json += `} ] }` rm_sheep.Baa(2, " JSON -> %s", json) msg := ipc.Mk_chmsg() msg.Send_req(am_ch, nil, REQ_SENDSHORT, json, nil) // send this as a short request to one agent p.Set_pushed() }
/* Check the two pledges (old, new) to see if the related physical hosts have moved. Returns true if the physical hosts have changed. We get the current physical location for the hosts from the network based on the new pledge, and look at the path of the old pledge to see if they are the same as what was captured in the original path. We return true if they are different. */ func phosts_changed( old *gizmos.Pledge, new *gizmos.Pledge ) ( bool ) { var ( p2 *string = nil ) if old == nil || new == nil { return false } a1, a2 := (*new).Get_hosts( ) // get hosts from the new pledge ch := make( chan *ipc.Chmsg ) // do not close -- senders close channels req := ipc.Mk_chmsg( ) req.Send_req( nw_ch, ch, REQ_GETPHOST, a1, nil ) // xlate hostnames to physical host location req = <- ch // wait for response from network p1 := req.Response_data.( *string ) if a2 != nil { if len( *a2) > 1 && (*a2)[0:1] != "!" { // !// names aren't known, don't map req.Send_req( nw_ch, ch, REQ_GETPHOST, a2, nil ) req = <- ch if req.Response_data != nil { // for an external address this will be unknown p2 = req.Response_data.( *string ) } } } return (*old).Same_anchors( p1, p2 ) }
/* Builds one setqueue json request per host and sends it to the agent. If there are multiple agents attached, the individual messages will be fanned out across the available agents, otherwise the agent will just process them sequentially which would be the case if we put all hosts into the same message. This now augments the switch name with the suffix; needs to be fixed for q-full so that it handles intermediate names properly. In the world with the ssh-broker, there is no -h host on the command line and the script's view of host name might not have the suffix that we are supplied with. To prevent the script from not recognising an entry, we must now put an entry for both the host name and hostname+suffix into the list. */ func adjust_queues_agent(qlist []string, hlist *string, phsuffix *string) { var ( qjson string // final full json blob qjson_pfx string // static prefix sep = "" ) target_hosts := make(map[string]bool) // hosts that are actually affected by the queue list if phsuffix != nil { // need to convert the host names in the list to have suffix nql := make([]string, len(qlist)*2) // need one for each possible host name offset := len(qlist) // put the originals into the second half of the array for i := range qlist { nql[offset+i] = qlist[i] // just copy the original toks := strings.SplitN(qlist[i], "/", 2) // split host from front if len(toks) == 2 { nh := add_phost_suffix(&toks[0], phsuffix) // add the suffix nql[i] = *nh + "/" + toks[1] target_hosts[*nh] = true } else { nql[i] = qlist[i] fq_sheep.Baa(1, "target host not snarfed: %s", qlist[i]) } } qlist = nql } else { // just snarf the list of hosts affected for i := range qlist { toks := strings.SplitN(qlist[i], "/", 2) // split host from front if len(toks) == 2 { target_hosts[toks[0]] = true } } } fq_sheep.Baa(1, "adjusting queues: sending %d queue setting items to agents", len(qlist)) qjson_pfx = `{ "ctype": "action_list", "actions": [ { "atype": "setqueues", "qdata": [ ` for i := range qlist { fq_sheep.Baa(2, "queue info: %s", qlist[i]) qjson_pfx += fmt.Sprintf("%s%q", sep, qlist[i]) sep = ", " } qjson_pfx += ` ], "hosts": [ ` sep = "" for h := range target_hosts { // build one request per host and send to agents -- multiple ageents then these will fan out qjson = qjson_pfx // seed the next request with the constant prefix qjson += fmt.Sprintf("%s%q", sep, h) qjson += ` ] } ] }` fq_sheep.Baa(2, "queue update: host=%s %s", h, qjson) tmsg := ipc.Mk_chmsg() tmsg.Send_req(am_ch, nil, REQ_SENDSHORT, qjson, nil) // send this as a short request to one agent } }
/* * Push an "add mirror" request out to an agent in order to create the mirror. */ func push_mirror_reservation(gp *gizmos.Pledge, rname string, ch chan *ipc.Chmsg) { p, ok := (*gp).(*gizmos.Pledge_mirror) // better be a mirroring pledge if !ok { rm_sheep.Baa(1, "internal error: pledge passed to push_mirror_reservations wasn't a mirror pledge") (*gp).Set_pushed() // prevent looping until it expires return } ports, out, _, _, _, _, _, _ := p.Get_values() ports2 := strings.Replace(*ports, " ", ",", -1) // ports must be comma separated id := p.Get_id() host := p.Get_qid() rm_sheep.Baa(1, "Adding mirror %s on host %s", *id, *host) json := `{ "ctype": "action_list", "actions": [ { ` json += `"atype": "mirrorwiz", ` json += fmt.Sprintf(`"hosts": [ %q ], `, *host) if strings.Contains(ports2, ",vlan:") { // Because we have to store the ports list and the vlans in the same field // we split it out here n := strings.Index(ports2, ",vlan:") vlan := ports2[n+6:] ports2 = ports2[:n] json += fmt.Sprintf(`"qdata": [ "add", %q, %q, %q, %q ] `, *id, ports2, *out, vlan) } else { json += fmt.Sprintf(`"qdata": [ "add", %q, %q, %q ] `, *id, ports2, *out) } json += `} ] }` rm_sheep.Baa(2, " JSON -> %s", json) msg := ipc.Mk_chmsg() msg.Send_req(am_ch, nil, REQ_SENDSHORT, json, nil) // send this as a short request to one agent p.Set_pushed() }
/* * Validate a port. */ func validatePort(port *string) (vm *Net_vm, err error) { // handle mac:port form if strings.HasPrefix(*port, "mac:") { // Map the port MAC to a phost mac := (*port)[4:] my_ch := make(chan *ipc.Chmsg) defer close(my_ch) req := ipc.Mk_chmsg() req.Send_req(nw_ch, my_ch, REQ_GET_PHOST_FROM_MAC, &mac, nil) // request MAC -> phost translation req = <-my_ch if req.Response_data == nil { err = fmt.Errorf("Cannot find MAC: " + mac) } else { vm = Mk_netreq_vm(nil, nil, nil, nil, req.Response_data.(*string), &mac, nil, nil, nil) // only use the two fields http_sheep.Baa(1, "name=NIL id=NIL ip4=NIL phost=%s mac=%s gw=NIL fip=NIL", safe(vm.phost), safe(vm.mac)) } return } // handle project/host form my_ch := make(chan *ipc.Chmsg) // allocate channel for responses to our requests defer close(my_ch) req := ipc.Mk_chmsg() req.Send_req(osif_ch, my_ch, REQ_GET_HOSTINFO, port, nil) // request data req = <-my_ch if req.Response_data != nil { vm = req.Response_data.(*Net_vm) if vm.phost == nil { // There seems to be a bug in REQ_GET_HOSTINFO, such that the 2nd call works // wanting to capture this more aggressively since I cannot reproduce the first time failure http_sheep.Baa(1, "requiring a second osif lazy call: port=%s name=%s id=%s ip4=%s phost=%s mac=%s gw=%s fip=%s", safe(port), safe(vm.name), safe(vm.id), safe(vm.ip4), safe(vm.phost), safe(vm.mac), safe(vm.gw), safe(vm.fip)) req.Send_req(osif_ch, my_ch, REQ_GET_HOSTINFO, port, nil) req = <-my_ch vm = req.Response_data.(*Net_vm) err = req.State } http_sheep.Baa(1, "name=%s id=%s ip4=%s phost=%s mac=%s gw=%s fip=%s", safe(vm.name), safe(vm.id), safe(vm.ip4), safe(vm.phost), safe(vm.mac), safe(vm.gw), safe(vm.fip)) } else { if req.State != nil { err = req.State } } return }
/* Deal with incoming data from an agent. We add the buffer to the cahce (all input is expected to be json) and attempt to pull a blob of json from the cache. If the blob is pulled, then we act on it, else we assume another buffer or more will be coming to complete the blob and we'll do it next time round. */ func (a *agent) process_input(buf []byte) { var ( req agent_msg // unpacked message struct ) a.jcache.Add_bytes(buf) jblob := a.jcache.Get_blob() // get next blob if ready for jblob != nil { err := json.Unmarshal(jblob, &req) // unpack the json if err != nil { am_sheep.Baa(0, "ERR: unable to unpack agent_message: %s [TGUAGT000]", err) am_sheep.Baa(2, "offending json: %s", string(buf)) } else { am_sheep.Baa(1, "%s/%s received from agent", req.Ctype, req.Rtype) switch req.Ctype { // "command type" case "response": // response to a request if req.State == 0 { switch req.Rtype { case "map_mac2phost": msg := ipc.Mk_chmsg() msg.Send_req(nw_ch, nil, REQ_MAC2PHOST, req.Rdata, nil) // send into network manager -- we don't expect response default: am_sheep.Baa(2, "WRN: success response data from agent was ignored for: %s [TGUAGT001]", req.Rtype) if am_sheep.Would_baa(2) { am_sheep.Baa(2, "first few ignored messages from response:") for i := 0; i < len(req.Rdata) && i < 10; i++ { am_sheep.Baa(2, "[%d] %s", i, req.Rdata[i]) } } } } else { switch req.Rtype { case "bwow_fmod": am_sheep.Baa(1, "ERR: oneway bandwidth flow-mod failed; check agent logs for details [TGUAGT006]") for i := 0; i < len(req.Rdata) && i < 20; i++ { am_sheep.Baa(1, " [%d] %s", i, req.Rdata[i]) } default: am_sheep.Baa(1, "WRN: response messages for failed command were not interpreted: %s [TGUAGT002]", req.Rtype) for i := 0; i < len(req.Rdata) && i < 20; i++ { am_sheep.Baa(2, " [%d] %s", i, req.Rdata[i]) } } } default: am_sheep.Baa(1, "WRN: unrecognised command type type from agent: %s [TGUAGT003]", req.Ctype) } } jblob = a.jcache.Get_blob() // get next blob if the buffer completed one and containe a second } return }
/* A wrapper allowing a user thread to unregister with a function call rather than having to send a message to the dispatcher. */ func Unregister(band string, ch chan *Envelope) { reg := &Reg_msg{ band: band, ch: ch, } msg := ipc.Mk_chmsg() msg.Send_req(disp_ch, nil, UNREGISTER, reg, nil) // send the registration to dispatcher for processing }
/* A wrapper allowing a user thread to register with a function call rather than having to send a message to the dispatcher. */ func Register(band string, ch chan *Envelope, ldata interface{}) { reg := &Reg_msg{ band: band, ldata: ldata, ch: ch, } msg := ipc.Mk_chmsg() msg.Send_req(disp_ch, nil, REGISTER, reg, nil) // send the registration to dispatcher for processing }
/* Deal with input from the other side sent to the http url. This function is invoked directly by the http listener and as such we get no 'user data' so we rely on globals in order to be able to function. (There might be a way to deal with this using a closure, but I'm not taking the time to go down that path until other more important things are implemented.) We assume that the body contains one complete json struct which might contain several messages. This is invoked as a goroutine by the http environment and when this returns the session to the requestor is closed. So, we must ensure that we block until all output has been sent to the session before we return. We do this by creating a channel and we wait on a single message on that channel. The channel is passed in the datablock. Once we have the message, then we return. */ func deal_with(out http.ResponseWriter, in *http.Request) { var ( state string = "ERROR" msg string ) out.Header().Set("Content-Type", "application/json") // announce that everything out of this is json out.WriteHeader(http.StatusOK) // if we dealt with it, then it's always OK; requests errors in the output if there were any sheep.Baa(2, "dealing with a request") data_blk := &Data_block{} err := dig_data(in, data_blk) if err != nil { // missing or bad data -- punt early sheep.Baa(1, "msgrtr/http: missing or badly formatted data: %s: %s", in.Method, err) fmt.Fprintf(out, `{ "status": "ERROR", "comment": "missing or badly formatted data: %s", err }`) // error stuff back to user return } switch in.Method { case "PUT": msg = "PUT requests are unsupported" case "POST": sheep.Baa(2, "deal_with called for post") if len(data_blk.Events) <= 0 { sheep.Baa(1, "data block has no events?????") } else { data_blk.out = out data_blk.rel_ch = make(chan int, 1) state = "OK" sheep.Baa(2, "data: type=%s", data_blk.Events[0].Event_type) req := ipc.Mk_chmsg() req.Send_req(disp_ch, nil, RAW_BLOCK, data_blk, nil) // pass to dispatcher to process <-data_blk.rel_ch // wait on the dispatcher to signal ok to go on; we don't care what comes back } case "DELETE": msg = "DELETE requests are unsupported" case "GET": msg = "GET requests are unsupported" default: sheep.Baa(1, "deal_with called for unrecognised method: %s", in.Method) msg = fmt.Sprintf("unrecognised method: %s", in.Method) } if state == "ERROR" { fmt.Fprintf(out, fmt.Sprintf(`{ "endstate": { "status": %q, "comment": %q } }`, state, msg)) // send back a failure/error state } }
/* * Given a name, find the mirror that goes with the name. */ func lookupMirror(name string, cookie string) (mirror *gizmos.Pledge_mirror) { req := ipc.Mk_chmsg() my_ch := make(chan *ipc.Chmsg) // allocate channel for responses to our requests defer close(my_ch) req.Send_req(rmgr_ch, my_ch, REQ_GET, []*string{&name, &cookie}, nil) req = <-my_ch if req.State == nil { mi := req.Response_data.(*gizmos.Pledge) // assert to iface pointer mirror = (*mi).(*gizmos.Pledge_mirror) // assert to correct type } return }
/* For a single passthrough pledge, this function sets things up and sends needed requests to the fq-manger to create any necessary flow-mods. We send the following information to fq_mgr: source mac or endpoint (VM-- the host in the pledge) source IP and optionally port and protocol more specific reservations expiry switch (physical host -- compute node) Errors are returned to res_mgr via channel, but asycnh; we do not wait for responses to each message generated here. To_limit is a cap to the expiration time sent when creating a flow-mod. OVS (and others we assume) use an unsigned int32 as a hard timeout value, and thus have an upper limit of just over 18 hours. If to_limit is > 0, we'll ensure that the timeout passed on the request to fq-mgr won't exceed the limit, and we assume that this function is called periodically to update long running reservations. */ func pass_push_res(gp *gizmos.Pledge, rname *string, ch chan *ipc.Chmsg, to_limit int64) { var ( msg *ipc.Chmsg ) now := time.Now().Unix() p, ok := (*gp).(*gizmos.Pledge_pass) // generic pledge better be a passthrough pledge! if !ok { rm_sheep.Baa(1, "internal error in pass_push_reservation: pledge isn't a passthrough pledge") (*gp).Set_pushed() // prevent looping return } host, _, _, expiry, proto := p.Get_values() // reservation info that we need ip := name2ip(host) if ip != nil { // good ip addresses so we're good to go freq := Mk_fqreq(rname) // default flow mod request with empty match/actions (for bw requests, we don't need priority or such things) freq.Match.Smac = ip // fq_mgr has conversion map to convert to mac freq.Swid = p.Get_phost() // the phyiscal host where the VM lives and where fmods need to be deposited freq.Cookie = 0xffff // should be ignored, if we see this out there we've got problems if (*p).Is_paused() { freq.Expiry = time.Now().Unix() + 15 // if reservation shows paused, then we set the expiration to 15s from now which should force the flow-mods out } else { if to_limit > 0 && expiry > now+to_limit { freq.Expiry = now + to_limit // expiry must be capped so as not to overflow virtual switch variable size } else { freq.Expiry = expiry } } freq.Id = rname freq.Extip = &empty_str // this will change when ported to endpoint branch as the endpoint allows address and port 'in line' freq.Match.Ip1 = proto // the proto on the reservation should be [{udp|tcp:}]address[:port] freq.Match.Ip2 = nil freq.Espq = nil dup_str := "" freq.Exttyp = &dup_str rm_sheep.Baa(1, "pushing passthru reservation: %s", p) msg = ipc.Mk_chmsg() msg.Send_req(fq_ch, ch, REQ_PT_RESERVE, freq, nil) // queue work with fq-manger to read the struct and send cmd(s) to agent to get it done p.Set_pushed() // safe to mark the pledge as having been pushed. } }
/* * Return a string array of mirror names in the reservation cache. */ func getMirrors() []string { req := ipc.Mk_chmsg() my_ch := make(chan *ipc.Chmsg) // allocate channel for responses to our requests defer close(my_ch) req.Send_req(rmgr_ch, my_ch, REQ_GET_MIRRORS, nil, nil) // push it into the reservation manager which will drive flow-mods etc req = <-my_ch if req.State == nil { rv := string(*req.Response_data.(*string)) return strings.Split(rv, " ") } else { return []string{} } }
/* Set the user link capacity and forward it on to the network manager. We expect this to be a request from the far side (user/admin) or read from the chkpt file so the value is passed as a string (which is also what network wants too. */ func (inv *Inventory) add_ulcap( name *string, sval *string ) { val := clike.Atoi( *sval ) pdata := make( []*string, 2 ) // parameters for message to network pdata[0] = name pdata[1] = sval if val >= 0 && val < 101 { rm_sheep.Baa( 2, "adding user cap: %s %d", *name, val ) inv.ulcap_cache[*name] = val req := ipc.Mk_chmsg( ) req.Send_req( nw_ch, nil, REQ_SETULCAP, pdata, nil ) // push into the network environment } else { if val == -1 { delete( inv.ulcap_cache, *name ) req := ipc.Mk_chmsg( ) req.Send_req( nw_ch, nil, REQ_SETULCAP, pdata, nil ) // push into the network environment } else { rm_sheep.Baa( 1, "user link capacity not set %d is out of range (1-100)", val ) } } }
/* Deprecated -- these should no longer be set by tegu and if really needed should be set by the ql_bw*fmods and other agent scripts. Push table 9x flow-mods. The flowmods we toss into the 90 range of tables generally serve to mark metadata in a packet since metadata cannot be marked prior to a resub action (flaw in OVS if you ask me). Marking metadata is needed so that when one of our f-mods match we can resubmit into table 0 without triggering a loop, or a match of any of our other rules. Table is the table number (we assume 9x, but it could be anything) Meta is a string supplying the value/mask that is used on the action (e.g. 0x02/0x02) to set the 00000010 bit as an and operation. Cookie is the cookie value used on the f-mod. */ func table9x_fmods( rname *string, host string, table int, meta string, cookie int ) { fq_data := Mk_fqreq( rname ) // f-mod request with defaults (output==none) fq_data.Table = table fq_data.Cookie = cookie fq_data.Expiry = 0 // never expire // CAUTION: fq_mgr generic fmod needs to be changed and when it does these next three lines will need to change too fq_data.Espq = gizmos.Mk_spq( host, -1, -1 ) // send to specific host dup_str := "br-int" // these go to br-int only fq_data.Swid = &dup_str fq_data.Action.Meta = &meta // sole purpose is to set metadata msg := ipc.Mk_chmsg() msg.Send_req( fq_ch, nil, REQ_GEN_FMOD, fq_data, nil ) // no response right now -- eventually we want an asynch error }
/* Not to be confused with send_meta_fmods in res_mgr. This needs to be extended such that resmgr can just send fq-mgr a request to invoke this. */ func send_meta_fm(hlist []string, table int, cookie int, pattern string) { tmsg := ipc.Mk_chmsg() msg := &agent_cmd{Ctype: "action_list"} // create an agent message msg.Actions = make([]action, 1) msg.Actions[0].Atype = "flowmod" msg.Actions[0].Hosts = hlist msg.Actions[0].Fdata = make([]string, 1) msg.Actions[0].Fdata[0] = fmt.Sprintf(`-T %d -I -t 0 --match --action -m %s -N add 0x%x br-int`, table, pattern, cookie) json, err := json.Marshal(msg) // bundle into a json string if err != nil { fq_sheep.Baa(0, "steer: unable to build json to set meta flow mod") } else { fq_sheep.Baa(2, "meta json: %s", json) tmsg.Send_req(am_ch, nil, REQ_SENDSHORT, string(json), nil) // send as a short request to one agent } }
/* Given a name, send a request to the network manager to translate it to an IP address. If the name is nil or empty, we return nil. This is legit for steering in the case of L* endpoint specification. */ func name2ip(name *string) (ip *string) { ip = nil if name == nil || *name == "" { return } ch := make(chan *ipc.Chmsg) defer close(ch) // close it on return msg := ipc.Mk_chmsg() msg.Send_req(nw_ch, ch, REQ_GETIP, name, nil) msg = <-ch if msg.State == nil { // success ip = msg.Response_data.(*string) } return }
/* Given a name, get host info (IP, mac, switch-id, switch-port) from network. */ func get_hostinfo( name *string ) ( *string, *string, *string, int ) { if name != nil && *name != "" { ch := make( chan *ipc.Chmsg ); req := ipc.Mk_chmsg( ) req.Send_req( nw_ch, ch, REQ_HOSTINFO, name, nil ) // get host info string (mac, ip, switch) req = <- ch if req.State == nil { htoks := strings.Split( req.Response_data.( string ), "," ) // results are: ip, mac, switch-id, switch-port; all strings return &htoks[0], &htoks[1], &htoks[2], clike.Atoi( htoks[3] ) } else { rm_sheep.Baa( 1, "get_hostinfo: error from network mgr: %s", req.State ) } } rm_sheep.Baa( 1, "get_hostinfo: no name provided" ) return nil, nil, nil, 0 }
/* Send a reply back to the http requestor. This is a wrapper that puts a request on the dispatcher queue so that we serialise the access to the underlying data block. Status is presumed to be OK or ERROR or somesuch. msg is any string that is a 'commment' and data is json or other data (not quoted in the output). */ func (e *Event) Reply(state string, msg string, data string) { e.mu.Lock() if e.ack_sent { e.mu.Unlock() return } e.ack_sent = true // set now then release the lock; no need to hold others while we write e.mu.Unlock() if data != "" { e.msg = fmt.Sprintf(`{ "endstate": { "status": %q, "comment": %q, "data": %s} }`, state, msg, data) } else { e.msg = fmt.Sprintf(`{ "endstate": { "status": %q, "comment": %q } }`, state, msg) } cmsg := ipc.Mk_chmsg() cmsg.Send_req(disp_ch, nil, SEND_ACK, e, nil) // queue the event for a reply }
/* Looks for the named reservation and deletes it if found. The cookie must be either the supper cookie, or the cookie that the user supplied when the reservation was created. Deletion is affected by resetting the expiry time on the pledge to now + a few seconds. This will cause a new set of flow-mods to be sent out with an expiry time that will take them out post haste and without the need to send "delete" flow-mods out. This function sends a request to the network manager to delete the related queues. This must be done here so as to prevent any issues with the loosely coupled management of reservation and queue settings. It is VERY IMPORTANT to delete the reservation from the network perspective BEFORE the expiry time is reset. If it is reset first then the network splits timeslices based on the new expiry and queues end up dangling. */ func (inv *Inventory) Del_res( name *string, cookie *string ) (state error) { gp, state := inv.Get_res( name, cookie ) if gp != nil { rm_sheep.Baa( 2, "resgmgr: deleted reservation: %s", (*gp).To_str() ) state = nil switch p := (*gp).(type) { case *gizmos.Pledge_mirror: p.Set_expiry( time.Now().Unix() ) // expire the mirror NOW p.Set_pushed() // need this to force undo to occur case *gizmos.Pledge_bw, *gizmos.Pledge_bwow: // network handles either type ch := make( chan *ipc.Chmsg ) // do not close -- senders close channels req := ipc.Mk_chmsg( ) req.Send_req( nw_ch, ch, REQ_DEL, p, nil ) // delete from the network point of view req = <- ch // wait for response from network state = req.State p.Set_expiry( time.Now().Unix() + 15 ) // set the expiry to 15s from now which will force it out (*gp).Reset_pushed() // force push of flow-mods that reset the expiry case *gizmos.Pledge_pass: p.Set_expiry( time.Now().Unix() + 15 ) // set the expiry to 15s from now which will force it out (*gp).Reset_pushed() // force push of flow-mods that reset the expiry } } else { if state == nil { gp, state = inv.Get_retry_res( name, cookie ) // see if it's in the retry cache and cookie was valid for it if gp != nil { // FIXME???? // do we need to mark and continue to retry this and after it passes vetting then let it delete by pusshing out // short term flow-mods? this would cover the case where the flow-mods were pushed, but when tegu restarted ostack // didn't have enough info to vet the pledge, and thus the existing flow-mods do need to be reset on the phyisical // host. delete( inv.retry, *name ) // for pledges on the retry cache, they can just be deleted since no flow-mods exist etc } } else { rm_sheep.Baa( 2, "resgmgr: unable to delete reservation: not found: %s", *name ) } } return }
/* * Push a "delete mirror" request out to an agent in order to remove the mirror. */ func undo_mirror_reservation(gp *gizmos.Pledge, rname string, ch chan *ipc.Chmsg) { p, ok := (*gp).(*gizmos.Pledge_mirror) // better be a mirroring pledge if !ok { rm_sheep.Baa(1, "internal error: pledge passed to undo_mirror_reservations wasn't a mirror pledge") (*gp).Set_pushed() // prevent looping until it expires return } id := p.Get_id() host := p.Get_qid() rm_sheep.Baa(1, "Deleting mirror %s on host %s", *id, *host) json := `{ "ctype": "action_list", "actions": [ { ` json += `"atype": "mirrorwiz", ` json += fmt.Sprintf(`"hosts": [ %q ], `, *host) json += fmt.Sprintf(`"qdata": [ "del", %q ] `, *id) json += `} ] }` rm_sheep.Baa(2, " JSON -> %s", json) msg := ipc.Mk_chmsg() msg.Send_req(am_ch, nil, REQ_SENDSHORT, json, nil) // send this as a short request to one agent p.Set_pushed() }
/* Pulls the reservation from the inventory. Similar to delete, but not quite the same. This will clone the pledge. The clone is expired and left in the inventory to force a reset of flowmods. The network manager is sent a request to delete the queues associated with the path and the path is removed from the original pledge. The original pledge is returned so that it can be used to generate a new set of paths based on the hosts, expiry and bandwidth requirements of the initial reservation. Unlike the get/del functions, this is meant for internal support and does not require a cookie. It is important to delete the reservation from the network manager point of view BEFORE the expiry is reset. If expiry is set first then the network manager will cause queue timeslices to be split on that boundary leaving dangling queues. */ func (inv *Inventory) yank_res(name *string) (p *gizmos.Pledge, state error) { state = nil p = inv.cache[*name] if p != nil { switch pldg := (*p).(type) { case *gizmos.Pledge_bw: rm_sheep.Baa(2, "resgmgr: yanked reservation: %s", (*p).To_str()) cp := pldg.Clone(*name + ".yank") // clone but DO NOT set conclude time until after network delete! icp := gizmos.Pledge(cp) // must convert to a pledge interface inv.cache[*name+".yank"] = &icp // and then insert the address of the interface inv.cache[*name] = nil // yank original from the list delete(inv.cache, *name) pldg.Set_path_list(nil) // no path list for this pledge ch := make(chan *ipc.Chmsg) defer close(ch) // close it on return req := ipc.Mk_chmsg() req.Send_req(nw_ch, ch, REQ_DEL, cp, nil) // delete from the network point of view req = <-ch // wait for response from network state = req.State // now safe to set these cp.Set_expiry(time.Now().Unix() + 1) // force clone to be expired cp.Reset_pushed() // force it to go out again // not supported for other pledge types } } else { state = fmt.Errorf("no reservation with name: %s", *name) rm_sheep.Baa(2, "resgmgr: unable to yank, no reservation with name: %s", *name) } return }
/* * Parse and react to a POST to /tegu/mirrors/. We expect JSON describing the mirror request, to wit: * { * "start_time": "nnn", // optional * "end_time": "nnn", // required * "output": "<output spec>", // required * "port": [ "port1" , "port2", ...], // required * "vlan": "vlan", // optional * "cookie": "value", // optional * "name": "mirrorname", // optional * } * * Because multiple mirrors may be created as a result, we return an array of JSON results, one for each mirror: * [ * { * "name": "mirrorname", // tegu or user-defined mirror name * "url": "url", // URL to use for DELETE or GET * "error": "err" // error message (if any) * }, * .... * ] */ func mirror_post(in *http.Request, out http.ResponseWriter, data []byte) (code int, msg string) { http_sheep.Baa(5, "Request data: "+string(data)) code = http.StatusOK // 1. Unmarshall the JSON request, check for required fields type req_type struct { Start_time string `json:"start_time"` End_time string `json:"end_time"` // required Output string `json:"output"` // required Port []string `json:"port"` // required Vlan string `json:"vlan"` Cookie string `json:"cookie"` Name string `json:"name"` } var req req_type if err := json.Unmarshal(data, &req); err != nil { code = http.StatusBadRequest msg = "Bad JSON: " + err.Error() return } if req.End_time == "" || req.Output == "" || len(req.Port) == 0 { code = http.StatusBadRequest msg = "Missing a required field." return } // 2. Check start/end times, and VLAN list stime, etime, err := checkTimes(req.Start_time, req.End_time) if err != nil { code = http.StatusBadRequest msg = err.Error() return } err = validVlanList(req.Vlan) if err != nil { code = http.StatusBadRequest msg = err.Error() return } // 3. Generate random name if not given if req.Name == "" { req.Name = generateMirrorName() } else if !validName(req.Name) { code = http.StatusBadRequest msg = "Invalid mirror name: " + req.Name return } // 4. Validate input ports, and assign into groups plist, err := validatePorts(req.Port, req.Name) if err != nil { // no valid ports, give up code = http.StatusBadRequest msg = err.Error() return } // 5. Validate output port newport, err := validateOutputPort(&req.Output) if err != nil { code = http.StatusBadRequest msg = err.Error() return } req.Output = *newport // 6. Make one pledge per mirror, send to reservation mgr, build JSON return string scheme := "http" if isSSL { scheme = "https" } code = http.StatusCreated sep := "\n" bs := bytes.NewBufferString("[") for key, mirror := range *plist { if key != "_badports_" { // Make a pledge phost := key nam := mirror.name res, err := gizmos.Mk_mirror_pledge(mirror.ports, &req.Output, stime, etime, &nam, &req.Cookie, &phost, &req.Vlan) if res != nil { req := ipc.Mk_chmsg() my_ch := make(chan *ipc.Chmsg) // allocate channel for responses to our requests defer close(my_ch) // close it on return gp := gizmos.Pledge(res) // convert to generic pledge to pass req.Send_req(rmgr_ch, my_ch, REQ_DUPCHECK, &gp, nil) // see if we have a duplicate in the cache req = <-my_ch // get response from the network thread if req.Response_data != nil && req.Response_data.(*string) != nil { // response is a pointer to string, if the pointer isn't nil it's a dup rp := req.Response_data.(*string) if rp != nil { http_sheep.Baa(1, "duplicate mirror reservation was dropped") err = fmt.Errorf("reservation duplicates existing reservation: %s", *rp) } } else { req = ipc.Mk_chmsg() ip := gizmos.Pledge(res) // must pass an interface pointer to resmgr req.Send_req(rmgr_ch, my_ch, REQ_ADD, &ip, nil) // network OK'd it, so add it to the inventory req = <-my_ch // wait for completion if req.State == nil { ckptreq := ipc.Mk_chmsg() ckptreq.Send_req(rmgr_ch, nil, REQ_CHKPT, nil, nil) // request a chkpt now, but don't wait on it } else { err = fmt.Errorf("%s", req.State) } } if res_paused { http_sheep.Baa(1, "reservations are paused, accepted reservation will not be pushed until resumed") res.Pause(false) // when paused we must mark the reservation as paused and pushed so it doesn't push until resume received res.Set_pushed() } } else { if err == nil { err = fmt.Errorf("specific reason unknown") // ensure we have something for message } } mirror.err = err } bs.WriteString(fmt.Sprintf(`%s { "name": "%s", `, sep, mirror.name)) bs.WriteString(fmt.Sprintf(`"port": [ `)) sep2 := "" for _, p := range mirror.ports { bs.WriteString(fmt.Sprintf(`%s"%s"`, sep2, p)) sep2 = ", " } bs.WriteString(fmt.Sprintf(` ], `)) if mirror.err == nil { bs.WriteString(fmt.Sprintf(`"url": "%s://%s/tegu/mirrors/%s/"`, scheme, in.Host, mirror.name)) } else { bs.WriteString(fmt.Sprintf(`"error": "%s"`, mirror.err.Error())) } bs.WriteString(" }") sep = ",\n" } bs.WriteString("\n]\n") msg = bs.String() return }
/* Given a pledge, vet it. Called during checkpoint load, or when running the retry queue. Returns a diposition state: DS_ADD - Add pledge to reservation cache DS_RETRY - Add to retry queue (recoverable error) DS_DISCARD - Discard it; error but not recoverable */ func vet_pledge(p *gizmos.Pledge) (disposition int) { var ( my_ch chan *ipc.Chmsg ) if p == nil { return DS_DISCARD } if (*p).Is_expired() { rm_sheep.Baa(1, "resmgr: ckpt_load: ignored expired pledge: %s", (*p).String()) return DS_DISCARD } else { switch sp := (*p).(type) { // work on specific pledge type, but pass the Pledge interface to add() case *gizmos.Pledge_mirror: //err = i.Add_res( p ) // assume we can just add it back in as is case *gizmos.Pledge_steer: rm_sheep.Baa(0, "did not restore steering reservation from checkpoint; not implemented") return DS_DISCARD case *gizmos.Pledge_bwow: h1, h2 := sp.Get_hosts() // get the host names, fetch ostack data and update graph push_block := h2 == nil update_graph(h1, push_block, push_block) // dig h1 info; push to netmgr if h2 isn't known and block on response if h2 != nil { update_graph(h2, true, true) // dig h2 data and push to netmgr blocking for a netmgr response } my_ch = make(chan *ipc.Chmsg) req := ipc.Mk_chmsg() // now safe to ask netmgr to validate the oneway pledge req.Send_req(nw_ch, my_ch, REQ_BWOW_RESERVE, sp, nil) req = <-my_ch // should be OK, but the underlying network could have changed if req.Response_data != nil { gate := req.Response_data.(*gizmos.Gate) // expect that network sent us a gate sp.Set_gate(gate) gate_ip := "nil" gipp := gate.Get_extip() if gipp != nil { gate_ip = *gipp } h2s := "nil" if h2 != nil { h2s = *h2 } rm_sheep.Baa(1, "gate allocated for oneway reservation: %s h1=%s h2=%s gate_ip=%s", *(sp.Get_id()), *h1, h2s, gate_ip) //err = i.Add_res( p ) } else { rm_sheep.Baa(0, "WRN: pledge_vet: unable to reserve for oneway pledge: %s [TGURMG000]", (*p).To_str()) return DS_RETRY } case *gizmos.Pledge_bw: h1, h2 := sp.Get_hosts() // get the host names, fetch ostack data and update graph update_graph(h1, false, false) // don't need to block on this one, nor update fqmgr update_graph(h2, true, true) // wait for netmgr to update graph and then push related data to fqmgr my_ch = make(chan *ipc.Chmsg) req := ipc.Mk_chmsg() // now safe to ask netmgr to find a path for the pledge rm_sheep.Baa(2, "reserving path starts") req.Send_req(nw_ch, my_ch, REQ_BW_RESERVE, sp, nil) req = <-my_ch // should be OK, but the underlying network could have changed if req.Response_data != nil { rm_sheep.Baa(2, "reserving path finished") path_list := req.Response_data.([]*gizmos.Path) // path(s) that were found to be suitable for the reservation sp.Set_path_list(path_list) rm_sheep.Baa(1, "path allocated for chkptd reservation: %s %s %s; path length= %d", *(sp.Get_id()), *h1, *h2, len(path_list)) //err = i.Add_res( p ) } else { rm_sheep.Baa(0, "WRN: pledge_vet: unable to reserve for pledge: %s [TGURMG000]", (*p).To_str()) return DS_RETRY } case *gizmos.Pledge_pass: host, _ := sp.Get_hosts() update_graph(host, true, true) my_ch = make(chan *ipc.Chmsg) req := ipc.Mk_chmsg() // now safe to ask netmgr to find a path for the pledge req.Send_req(nw_ch, my_ch, REQ_GETPHOST, host, nil) // need to find the current phost for the vm req = <-my_ch if req.Response_data != nil { phost := req.Response_data.(*string) sp.Set_phost(phost) rm_sheep.Baa(1, "passthrou phost found for chkptd reservation: %s %s %s", *(sp.Get_id()), *host, *phost) //err = i.Add_res( p ) } else { s := fmt.Errorf("unknown reason") if req.State != nil { s = req.State } rm_sheep.Baa(0, "WRN: pledge_vet: unable to find phost for passthru pledge: %s [TGURMG000]", s) rm_sheep.Baa(0, "erroring passthru pledge: %s", (*p).To_str()) return DS_RETRY } default: rm_sheep.Baa(0, "rmgr/vet_pledge: unrecognised pledge type") return DS_DISCARD } // end switch on specific pledge type } return DS_ADD }
/* Executes as a goroutine to drive the reservation manager portion of tegu. */ func Res_manager( my_chan chan *ipc.Chmsg, cookie *string ) { var ( inv *Inventory msg *ipc.Chmsg ckptd string last_qcheck int64 = 0 // time that the last queue check was made to set window last_chkpt int64 = 0 // time that the last checkpoint was written retry_chkpt bool = false // checkpoint needs to be retried because of a timing issue queue_gen_type = REQ_GEN_EPQMAP alt_table = DEF_ALT_TABLE // table number where meta marking happens all_sys_up bool = false; // set when we receive the all_up message; some functions (chkpt) must wait for this hto_limit int = 3600 * 18 // OVS has a size limit to the hard timeout value, this caps it just under the OVS limit res_refresh int64 = 0 // next time when we must force all reservations to refresh flow-mods (hto_limit nonzero) rr_rate int = 3600 // refresh rate (1 hour) favour_v6 bool = true // favour ipv6 addresses if a host has both defined. ) super_cookie = cookie // global for all methods rm_sheep = bleater.Mk_bleater( 0, os.Stderr ) // allocate our bleater and attach it to the master rm_sheep.Set_prefix( "res_mgr" ) tegu_sheep.Add_child( rm_sheep ) // we become a child so that if the master vol is adjusted we'll react too p := cfg_data["default"]["queue_type"] // lives in default b/c used by fq-mgr too if p != nil { if *p == "endpoint" { queue_gen_type = REQ_GEN_EPQMAP } else { queue_gen_type = REQ_GEN_QMAP } } p = cfg_data["default"]["alttable"] // alt table for meta marking if p != nil { alt_table = clike.Atoi( *p ) } p = cfg_data["default"]["favour_ipv6"] if p != nil { favour_v6 = *p == "true" } if cfg_data["resmgr"] != nil { cdp := cfg_data["resmgr"]["chkpt_dir"] if cdp == nil { ckptd = "/var/lib/tegu/resmgr" // default directory and prefix } else { ckptd = *cdp + "/resmgr" // add prefix to directory in config } p = cfg_data["resmgr"]["verbose"] if p != nil { rm_sheep.Set_level( uint( clike.Atoi( *p ) ) ) } /* p = cfg_data["resmgr"]["set_vlan"] if p != nil { set_vlan = *p == "true" } */ p = cfg_data["resmgr"]["super_cookie"] if p != nil { super_cookie = p rm_sheep.Baa( 1, "super-cookie was set from config file" ) } p = cfg_data["resmgr"]["hto_limit"] // if OVS or whatever has a max timeout we can ensure it's not surpassed if p != nil { hto_limit = clike.Atoi( *p ) } p = cfg_data["resmgr"]["res_refresh"] // rate that reservations are refreshed if hto_limit is non-zero if p != nil { rr_rate = clike.Atoi( *p ) if rr_rate < 900 { if rr_rate < 120 { rm_sheep.Baa( 0, "NOTICE: reservation refresh rate in config is insanely low (%ds) and was changed to 1800s", rr_rate ) rr_rate = 1800 } else { rm_sheep.Baa( 0, "NOTICE: reservation refresh rate in config is too low: %ds", rr_rate ) } } } } send_meta_counter := 200; // send meta f-mods only now and again rm_sheep.Baa( 1, "ovs table number %d used for metadata marking", alt_table ) res_refresh = time.Now().Unix() + int64( rr_rate ) // set first refresh in an hour (ignored if hto_limit not set inv = Mk_inventory( ) inv.chkpt = chkpt.Mk_chkpt( ckptd, 10, 90 ) last_qcheck = time.Now().Unix() tkl_ch := make( chan *ipc.Chmsg, 5 ) // special, short buffer, channel for tickles allows 5 to queue before blocking sender tklr.Add_spot( 2, tkl_ch, REQ_PUSH, nil, ipc.FOREVER ) // push reservations to agent just before they go live tklr.Add_spot( 1, tkl_ch, REQ_SETQUEUES, nil, ipc.FOREVER ) // drives us to see if queues need to be adjusted tklr.Add_spot( 5, tkl_ch, REQ_RTRY_CHKPT, nil, ipc.FOREVER ) // ensures that we retried any missed checkpoints tklr.Add_spot( 60, tkl_ch, REQ_VET_RETRY, nil, ipc.FOREVER ) // run the retry queue if it has size go rm_lookup( rmgrlu_ch, inv ) rm_sheep.Baa( 3, "res_mgr is running %x", my_chan ) for { select { // select next ready message on either channel case msg = <- tkl_ch: // msg available on tickle channel msg.State = nil // nil state is OK, no error my_chan <- msg; // just pass it through; tkl_ch has a small buffer (blocks quickly) and this prevents filling the main queue w/ tickles if we get busy case msg = <- my_chan: // process message from the main channel rm_sheep.Baa( 3, "processing message: %d", msg.Msg_type ) switch msg.Msg_type { case REQ_NOOP: // just ignore case REQ_ADD: msg.State = inv.Add_res( msg.Req_data ) // add will determine the pledge type and do the right thing msg.Response_data = nil case REQ_ALLUP: // signals that all initialisation is complete (chkpting etc. can go) all_sys_up = true // periodic checkpointing turned off with the introduction of tegu_ha //tklr.Add_spot( 180, my_chan, REQ_CHKPT, nil, ipc.FOREVER ) // tickle spot to drive us every 180 seconds to checkpoint case REQ_RTRY_CHKPT: // called to attempt to send a queued checkpoint request if all_sys_up { if retry_chkpt { rm_sheep.Baa( 3, "invoking checkpoint (retry)" ) retry_chkpt, last_chkpt = inv.write_chkpt( last_chkpt ) } } case REQ_CHKPT: // external thread has requested checkpoint if all_sys_up { rm_sheep.Baa( 3, "invoking checkpoint" ) retry_chkpt, last_chkpt = inv.write_chkpt( last_chkpt ) } case REQ_DEL: // user initiated delete -- requires cookie data := msg.Req_data.( []*string ) // assume pointers to name and cookie if data[0] != nil && *data[0] == "all" { inv.Del_all_res( data[1] ) msg.State = nil } else { msg.State = inv.Del_res( data[0], data[1] ) } inv.push_reservations( my_chan, alt_table, int64( hto_limit ), favour_v6 ) // must force a push to push augmented (shortened) reservations msg.Response_data = nil case REQ_DUPCHECK: if msg.Req_data != nil { msg.Response_data, msg.State = inv.dup_check( msg.Req_data.( *gizmos.Pledge ) ) } case REQ_GET: // user initiated get -- requires cookie data := msg.Req_data.( []*string ) // assume pointers to name and cookie msg.Response_data, msg.State = inv.Get_res( data[0], data[1] ) case REQ_LIST: // list reservations (for a client) msg.Response_data, msg.State = inv.res2json( ) case REQ_LOAD: // load from a checkpoint file data := msg.Req_data.( *string ) // assume pointers to name and cookie msg.State = inv.load_chkpt( data ) msg.Response_data = nil rm_sheep.Baa( 1, "checkpoint file loaded" ) case REQ_PAUSE: msg.State = nil // right now this cannot fail in ways we know about msg.Response_data = "" inv.pause_on() res_refresh = 0; // must force a push of everything on next push tickle rm_sheep.Baa( 1, "pausing..." ) case REQ_RESUME: msg.State = nil // right now this cannot fail in ways we know about msg.Response_data = "" res_refresh = 0; // must force a push of everything on next push tickle inv.pause_off() case REQ_SETQUEUES: // driven about every second to reset the queues if a reservation state has changed now := time.Now().Unix() if now > last_qcheck && inv.any_concluded( now - last_qcheck ) || inv.any_commencing( now - last_qcheck, 0 ) { rm_sheep.Baa( 1, "channel states: rm=%d rmlu=%d fq=%d net=%d agent=%d", len( rmgr_ch ), len( rmgrlu_ch ), len( fq_ch ), len( nw_ch ), len( am_ch ) ) rm_sheep.Baa( 1, "reservation state change detected, requesting queue map from net-mgr" ) tmsg := ipc.Mk_chmsg( ) tmsg.Send_req( nw_ch, my_chan, queue_gen_type, time.Now().Unix(), nil ) // get a queue map; when it arrives we'll push to fqmgr and trigger flow-mod push } last_qcheck = now case REQ_PUSH: // driven every few seconds to check for need to refresh because of switch max timeout setting if hto_limit > 0 { // if reservation flow-mods are capped with a hard timeout limit now := time.Now().Unix() if now > res_refresh { rm_sheep.Baa( 2, "refreshing all reservations" ) inv.reset_push() // reset pushed flag on all reservations to cause active ones to be pushed again res_refresh = now + int64( rr_rate ) // push everything again in an hour inv.push_reservations( my_chan, alt_table, int64( hto_limit ), favour_v6 ) // force a push of all } } case REQ_PLEDGE_LIST: // generate a list of pledges that are related to the given VM msg.Response_data, msg.State = inv.pledge_list( msg.Req_data.( *string ) ) case REQ_SETULCAP: // user link capacity; expect array of two string pointers (name and value) data := msg.Req_data.( []*string ) inv.add_ulcap( data[0], data[1] ) retry_chkpt, last_chkpt = inv.write_chkpt( last_chkpt ) // CAUTION: the requests below come back as asynch responses rather than as initial message case REQ_IE_RESERVE: // an IE reservation failed msg.Response_ch = nil // immediately disable to prevent loop inv.failed_push( msg ) // suss out the pledge and mark it unpushed case REQ_GEN_QMAP: // response caries the queue map that now should be sent to fq-mgr to drive a queue update fallthrough case REQ_GEN_EPQMAP: rm_sheep.Baa( 1, "received queue map from network manager" ) qlist := msg.Response_data.( []string ) // get the qulist map for our use first if send_meta_counter >= 200 { send_meta_fmods( qlist, alt_table ) // push meta rules send_meta_counter = 0 } else { send_meta_counter++ } msg.Response_ch = nil // immediately disable to prevent loop fq_data := make( []interface{}, 1 ) fq_data[FQ_QLIST] = msg.Response_data tmsg := ipc.Mk_chmsg( ) tmsg.Send_req( fq_ch, nil, REQ_SETQUEUES, fq_data, nil ) // send the queue list to fq manager to deal with inv.push_reservations( my_chan, alt_table, int64( hto_limit ), favour_v6 ) // now safe to push reservations if any activated case REQ_VET_RETRY: if inv != nil && len( inv.retry ) > 0 { inv.vet_retries( ) } case REQ_YANK_RES: // yank a reservation from the inventory returning the pledge and allowing flow-mods to purge if msg.Response_ch != nil { msg.Response_data, msg.State = inv.yank_res( msg.Req_data.( *string ) ) } /* deprecated -- moved to rm_lookup case REQ_GET_MIRRORS: // user initiated get list of mirrors t := inv.Get_mirrorlist() msg.Response_data = &t; */ default: rm_sheep.Baa( 0, "WRN: res_mgr: unknown message: %d [TGURMG001]", msg.Msg_type ) msg.Response_data = nil msg.State = fmt.Errorf( "res_mgr: unknown message (%d)", msg.Msg_type ) msg.Response_ch = nil // we don't respond to these. } // end main channel case } // end select rm_sheep.Baa( 3, "processing message complete: %d", msg.Msg_type ) if msg.Response_ch != nil { // if a response channel was provided msg.Response_ch <- msg // send our result back to the requester } } }
func TestIpc(t *testing.T) { req := ipc.Mk_chmsg() if req == nil { fmt.Fprintf(os.Stderr, "unable to create a request\n") t.Fail() return } start_ts := time.Now().Unix() fmt.Fprintf(os.Stderr, "this test runs for 60 seconds and will generate updates periodically....\n") req.Response_ch = nil // use it to keep compiler happy data := "data for tickled bit" ch := make(chan *ipc.Chmsg, 10) // allow 10 messages to queue on the channel // to test non-blocking aspect, set to 1 test should run longer than 60 seconds tklr := ipc.Mk_tickler(6) // allow max of 6 ticklers; we test 'full' error at end tklr.Add_spot(35, ch, 30, &data, 0) // will automatically start the tickler tklr.Add_spot(20, ch, 20, &data, 0) tklr.Add_spot(15, ch, 15, &data, 0) id, err := tklr.Add_spot(10, ch, 10, &data, 0) // nick the id so we can drop it later _, err = tklr.Add_spot(10, ch, 1, &data, 2) // should drive type 1 only twice; 10s apart if err != nil { fmt.Fprintf(os.Stderr, "unable to add tickle spot: %s\n", err) t.Fail() return } fmt.Fprintf(os.Stderr, "type 10 and type 1 written every 10 seconds; type 1 only written twice\n") fmt.Fprintf(os.Stderr, "type 10 will be dropped after 35 seconds\n") fmt.Fprintf(os.Stderr, "type 15 will appear every 15 seconds\n") fmt.Fprintf(os.Stderr, "type 20 will appear every 20 seconds\n") fmt.Fprintf(os.Stderr, "type 30 will appear every 35 seconds\n") limited_count := 0 for count := 0; count < 2; { req = <-ch // wait for tickle fmt.Fprintf(os.Stderr, "got a tickle: @%ds type=%d count=%d\n", time.Now().Unix()-start_ts, req.Msg_type, count) if req.Msg_type == 30 { if count == 0 { fmt.Fprintf(os.Stderr, "dropping type 10 from list; no more type 10 should appear\n") tklr.Drop_spot(id) // drop the 10s tickler after first 30 second one pops } count++ // count updated only at 30s point } if req.Msg_type == 1 { if limited_count > 1 { fmt.Fprintf(os.Stderr, "limited count tickle was driven more than twice [FAIL]\n") t.Fail() } limited_count++ } if req.Msg_type == 10 && count > 0 { fmt.Fprintf(os.Stderr, "req 10 driven after it was dropped [FAIL]\n") t.Fail() } } tklr.Stop() // when we get here there should only be three active ticklers in the list, so we should be // able to add 3 before we get a full error. // add more spots until we max out to test the error logic in tickle err = nil for i := 0; i < 3 && err == nil; i++ { _, err = tklr.Add_spot(20, ch, 20, &data, 0) if err != nil { fmt.Fprintf(os.Stderr, "early failure when adding more: i=%d %s\n", i, err) t.Fail() return } } // the table should be full (6 active ticklers now) and this should return an error _, err = tklr.Add_spot(10, ch, 10, &data, 0) if err != nil { fmt.Fprintf(os.Stderr, "test to over fill the table resulted in the expected error: %s [OK]\n", err) } else { fmt.Fprintf(os.Stderr, "adding a 7th tickle spot didn't cause an error and should have [FAIL]\n") t.Fail() } }
func add_nbmsg(ch chan *ipc.Chmsg, mt int) { r := ipc.Mk_chmsg() fmt.Fprintf(os.Stderr, "\tsending message type %d\n", mt) r.Send_nbreq(ch, nil, mt, nil, nil) fmt.Fprintf(os.Stderr, "\tsent message type %d\n", mt) }