/* Executes as a goroutine to drive the reservation manager portion of tegu. */ func Res_manager( my_chan chan *ipc.Chmsg, cookie *string ) { var ( inv *Inventory msg *ipc.Chmsg ckptd string last_qcheck int64 = 0 // time that the last queue check was made to set window last_chkpt int64 = 0 // time that the last checkpoint was written retry_chkpt bool = false // checkpoint needs to be retried because of a timing issue queue_gen_type = REQ_GEN_EPQMAP alt_table = DEF_ALT_TABLE // table number where meta marking happens all_sys_up bool = false; // set when we receive the all_up message; some functions (chkpt) must wait for this hto_limit int = 3600 * 18 // OVS has a size limit to the hard timeout value, this caps it just under the OVS limit res_refresh int64 = 0 // next time when we must force all reservations to refresh flow-mods (hto_limit nonzero) rr_rate int = 3600 // refresh rate (1 hour) favour_v6 bool = true // favour ipv6 addresses if a host has both defined. ) super_cookie = cookie // global for all methods rm_sheep = bleater.Mk_bleater( 0, os.Stderr ) // allocate our bleater and attach it to the master rm_sheep.Set_prefix( "res_mgr" ) tegu_sheep.Add_child( rm_sheep ) // we become a child so that if the master vol is adjusted we'll react too p := cfg_data["default"]["queue_type"] // lives in default b/c used by fq-mgr too if p != nil { if *p == "endpoint" { queue_gen_type = REQ_GEN_EPQMAP } else { queue_gen_type = REQ_GEN_QMAP } } p = cfg_data["default"]["alttable"] // alt table for meta marking if p != nil { alt_table = clike.Atoi( *p ) } p = cfg_data["default"]["favour_ipv6"] if p != nil { favour_v6 = *p == "true" } if cfg_data["resmgr"] != nil { cdp := cfg_data["resmgr"]["chkpt_dir"] if cdp == nil { ckptd = "/var/lib/tegu/resmgr" // default directory and prefix } else { ckptd = *cdp + "/resmgr" // add prefix to directory in config } p = cfg_data["resmgr"]["verbose"] if p != nil { rm_sheep.Set_level( uint( clike.Atoi( *p ) ) ) } /* p = cfg_data["resmgr"]["set_vlan"] if p != nil { set_vlan = *p == "true" } */ p = cfg_data["resmgr"]["super_cookie"] if p != nil { super_cookie = p rm_sheep.Baa( 1, "super-cookie was set from config file" ) } p = cfg_data["resmgr"]["hto_limit"] // if OVS or whatever has a max timeout we can ensure it's not surpassed if p != nil { hto_limit = clike.Atoi( *p ) } p = cfg_data["resmgr"]["res_refresh"] // rate that reservations are refreshed if hto_limit is non-zero if p != nil { rr_rate = clike.Atoi( *p ) if rr_rate < 900 { if rr_rate < 120 { rm_sheep.Baa( 0, "NOTICE: reservation refresh rate in config is insanely low (%ds) and was changed to 1800s", rr_rate ) rr_rate = 1800 } else { rm_sheep.Baa( 0, "NOTICE: reservation refresh rate in config is too low: %ds", rr_rate ) } } } } send_meta_counter := 200; // send meta f-mods only now and again rm_sheep.Baa( 1, "ovs table number %d used for metadata marking", alt_table ) res_refresh = time.Now().Unix() + int64( rr_rate ) // set first refresh in an hour (ignored if hto_limit not set inv = Mk_inventory( ) inv.chkpt = chkpt.Mk_chkpt( ckptd, 10, 90 ) last_qcheck = time.Now().Unix() tkl_ch := make( chan *ipc.Chmsg, 5 ) // special, short buffer, channel for tickles allows 5 to queue before blocking sender tklr.Add_spot( 2, tkl_ch, REQ_PUSH, nil, ipc.FOREVER ) // push reservations to agent just before they go live tklr.Add_spot( 1, tkl_ch, REQ_SETQUEUES, nil, ipc.FOREVER ) // drives us to see if queues need to be adjusted tklr.Add_spot( 5, tkl_ch, REQ_RTRY_CHKPT, nil, ipc.FOREVER ) // ensures that we retried any missed checkpoints tklr.Add_spot( 60, tkl_ch, REQ_VET_RETRY, nil, ipc.FOREVER ) // run the retry queue if it has size go rm_lookup( rmgrlu_ch, inv ) rm_sheep.Baa( 3, "res_mgr is running %x", my_chan ) for { select { // select next ready message on either channel case msg = <- tkl_ch: // msg available on tickle channel msg.State = nil // nil state is OK, no error my_chan <- msg; // just pass it through; tkl_ch has a small buffer (blocks quickly) and this prevents filling the main queue w/ tickles if we get busy case msg = <- my_chan: // process message from the main channel rm_sheep.Baa( 3, "processing message: %d", msg.Msg_type ) switch msg.Msg_type { case REQ_NOOP: // just ignore case REQ_ADD: msg.State = inv.Add_res( msg.Req_data ) // add will determine the pledge type and do the right thing msg.Response_data = nil case REQ_ALLUP: // signals that all initialisation is complete (chkpting etc. can go) all_sys_up = true // periodic checkpointing turned off with the introduction of tegu_ha //tklr.Add_spot( 180, my_chan, REQ_CHKPT, nil, ipc.FOREVER ) // tickle spot to drive us every 180 seconds to checkpoint case REQ_RTRY_CHKPT: // called to attempt to send a queued checkpoint request if all_sys_up { if retry_chkpt { rm_sheep.Baa( 3, "invoking checkpoint (retry)" ) retry_chkpt, last_chkpt = inv.write_chkpt( last_chkpt ) } } case REQ_CHKPT: // external thread has requested checkpoint if all_sys_up { rm_sheep.Baa( 3, "invoking checkpoint" ) retry_chkpt, last_chkpt = inv.write_chkpt( last_chkpt ) } case REQ_DEL: // user initiated delete -- requires cookie data := msg.Req_data.( []*string ) // assume pointers to name and cookie if data[0] != nil && *data[0] == "all" { inv.Del_all_res( data[1] ) msg.State = nil } else { msg.State = inv.Del_res( data[0], data[1] ) } inv.push_reservations( my_chan, alt_table, int64( hto_limit ), favour_v6 ) // must force a push to push augmented (shortened) reservations msg.Response_data = nil case REQ_DUPCHECK: if msg.Req_data != nil { msg.Response_data, msg.State = inv.dup_check( msg.Req_data.( *gizmos.Pledge ) ) } case REQ_GET: // user initiated get -- requires cookie data := msg.Req_data.( []*string ) // assume pointers to name and cookie msg.Response_data, msg.State = inv.Get_res( data[0], data[1] ) case REQ_LIST: // list reservations (for a client) msg.Response_data, msg.State = inv.res2json( ) case REQ_LOAD: // load from a checkpoint file data := msg.Req_data.( *string ) // assume pointers to name and cookie msg.State = inv.load_chkpt( data ) msg.Response_data = nil rm_sheep.Baa( 1, "checkpoint file loaded" ) case REQ_PAUSE: msg.State = nil // right now this cannot fail in ways we know about msg.Response_data = "" inv.pause_on() res_refresh = 0; // must force a push of everything on next push tickle rm_sheep.Baa( 1, "pausing..." ) case REQ_RESUME: msg.State = nil // right now this cannot fail in ways we know about msg.Response_data = "" res_refresh = 0; // must force a push of everything on next push tickle inv.pause_off() case REQ_SETQUEUES: // driven about every second to reset the queues if a reservation state has changed now := time.Now().Unix() if now > last_qcheck && inv.any_concluded( now - last_qcheck ) || inv.any_commencing( now - last_qcheck, 0 ) { rm_sheep.Baa( 1, "channel states: rm=%d rmlu=%d fq=%d net=%d agent=%d", len( rmgr_ch ), len( rmgrlu_ch ), len( fq_ch ), len( nw_ch ), len( am_ch ) ) rm_sheep.Baa( 1, "reservation state change detected, requesting queue map from net-mgr" ) tmsg := ipc.Mk_chmsg( ) tmsg.Send_req( nw_ch, my_chan, queue_gen_type, time.Now().Unix(), nil ) // get a queue map; when it arrives we'll push to fqmgr and trigger flow-mod push } last_qcheck = now case REQ_PUSH: // driven every few seconds to check for need to refresh because of switch max timeout setting if hto_limit > 0 { // if reservation flow-mods are capped with a hard timeout limit now := time.Now().Unix() if now > res_refresh { rm_sheep.Baa( 2, "refreshing all reservations" ) inv.reset_push() // reset pushed flag on all reservations to cause active ones to be pushed again res_refresh = now + int64( rr_rate ) // push everything again in an hour inv.push_reservations( my_chan, alt_table, int64( hto_limit ), favour_v6 ) // force a push of all } } case REQ_PLEDGE_LIST: // generate a list of pledges that are related to the given VM msg.Response_data, msg.State = inv.pledge_list( msg.Req_data.( *string ) ) case REQ_SETULCAP: // user link capacity; expect array of two string pointers (name and value) data := msg.Req_data.( []*string ) inv.add_ulcap( data[0], data[1] ) retry_chkpt, last_chkpt = inv.write_chkpt( last_chkpt ) // CAUTION: the requests below come back as asynch responses rather than as initial message case REQ_IE_RESERVE: // an IE reservation failed msg.Response_ch = nil // immediately disable to prevent loop inv.failed_push( msg ) // suss out the pledge and mark it unpushed case REQ_GEN_QMAP: // response caries the queue map that now should be sent to fq-mgr to drive a queue update fallthrough case REQ_GEN_EPQMAP: rm_sheep.Baa( 1, "received queue map from network manager" ) qlist := msg.Response_data.( []string ) // get the qulist map for our use first if send_meta_counter >= 200 { send_meta_fmods( qlist, alt_table ) // push meta rules send_meta_counter = 0 } else { send_meta_counter++ } msg.Response_ch = nil // immediately disable to prevent loop fq_data := make( []interface{}, 1 ) fq_data[FQ_QLIST] = msg.Response_data tmsg := ipc.Mk_chmsg( ) tmsg.Send_req( fq_ch, nil, REQ_SETQUEUES, fq_data, nil ) // send the queue list to fq manager to deal with inv.push_reservations( my_chan, alt_table, int64( hto_limit ), favour_v6 ) // now safe to push reservations if any activated case REQ_VET_RETRY: if inv != nil && len( inv.retry ) > 0 { inv.vet_retries( ) } case REQ_YANK_RES: // yank a reservation from the inventory returning the pledge and allowing flow-mods to purge if msg.Response_ch != nil { msg.Response_data, msg.State = inv.yank_res( msg.Req_data.( *string ) ) } /* deprecated -- moved to rm_lookup case REQ_GET_MIRRORS: // user initiated get list of mirrors t := inv.Get_mirrorlist() msg.Response_data = &t; */ default: rm_sheep.Baa( 0, "WRN: res_mgr: unknown message: %d [TGURMG001]", msg.Msg_type ) msg.Response_data = nil msg.State = fmt.Errorf( "res_mgr: unknown message (%d)", msg.Msg_type ) msg.Response_ch = nil // we don't respond to these. } // end main channel case } // end select rm_sheep.Baa( 3, "processing message complete: %d", msg.Msg_type ) if msg.Response_ch != nil { // if a response channel was provided msg.Response_ch <- msg // send our result back to the requester } } }
/* the main go routine to act on messages sent to our channel. We expect messages from the reservation manager, and from a tickler that causes us to evaluate the need to resize ovs queues. DSCP values: Dscp values range from 0-64 decimal, but when described on or by flow-mods are shifted two bits to the left. The send flow mod function will do the needed shifting so all values outside of that one funciton should assume/use decimal values in the range of 0-64. */ func Fq_mgr(my_chan chan *ipc.Chmsg, sdn_host *string) { var ( uri_prefix string = "" msg *ipc.Chmsg data []interface{} // generic list of data on some requests fdata *Fq_req // flow-mod request data qcheck_freq int64 = 5 hcheck_freq int64 = 180 host_list *string // current set of openstack real hosts ip2mac map[string]*string // translation from ip address to mac switch_hosts *string // from config file and overrides openstack list if given (mostly testing) ssq_cmd *string // command string used to set switch queues (from config file) send_all bool = false // send all flow-mods; false means send just ingress/egress and not intermediate switch f-mods alt_table int = DEF_ALT_TABLE // meta data marking table phost_suffix *string = nil // physical host suffix added to each host name in the list from openstack (config) //max_link_used int64 = 0 // the current maximum link utilisation ) fq_sheep = bleater.Mk_bleater(0, os.Stderr) // allocate our bleater and attach it to the master fq_sheep.Set_prefix("fq_mgr") tegu_sheep.Add_child(fq_sheep) // we become a child so that if the master vol is adjusted we'll react too // -------------- pick up config file data if there -------------------------------- if *sdn_host == "" { // not supplied on command line, pull from config if sdn_host = cfg_data["default"]["sdn_host"]; sdn_host == nil { // no default; when not in config, then it's turned off and we send to agent sdn_host = &empty_str } } if cfg_data["default"]["queue_type"] != nil { if *cfg_data["default"]["queue_type"] == "endpoint" { send_all = false } else { send_all = true } } if p := cfg_data["default"]["alttable"]; p != nil { // this is the base; we use alt_table to alt_table + (n-1) when we need more than 1 table alt_table = clike.Atoi(*p) } if cfg_data["fqmgr"] != nil { // pick up things in our specific setion if dp := cfg_data["fqmgr"]["ssq_cmd"]; dp != nil { // set switch queue command ssq_cmd = dp } /* if p := cfg_data["fqmgr"]["default_dscp"]; p != nil { // this is a single value and should not be confused with the dscp list in the default section of the config dscp = clike.Atoi( *p ) } */ if p := cfg_data["fqmgr"]["queue_check"]; p != nil { // queue check frequency from the control file qcheck_freq = clike.Atoi64(*p) if qcheck_freq < 5 { qcheck_freq = 5 } } if p := cfg_data["fqmgr"]["host_check"]; p != nil { // frequency of checking for new _real_ hosts from openstack hcheck_freq = clike.Atoi64(*p) if hcheck_freq < 30 { hcheck_freq = 30 } } if p := cfg_data["fqmgr"]["switch_hosts"]; p != nil { switch_hosts = p } if p := cfg_data["fqmgr"]["verbose"]; p != nil { fq_sheep.Set_level(uint(clike.Atoi(*p))) } if p := cfg_data["fqmgr"]["phost_suffix"]; p != nil { // suffix added to physical host strings for agent commands if *p != "" { phost_suffix = p fq_sheep.Baa(1, "physical host names will be suffixed with: %s", *phost_suffix) } } } // ----- end config file munging --------------------------------------------------- //tklr.Add_spot( qcheck_freq, my_chan, REQ_SETQUEUES, nil, ipc.FOREVER ); // tickle us every few seconds to adjust the ovs queues if needed if switch_hosts == nil { tklr.Add_spot(2, my_chan, REQ_CHOSTLIST, nil, 1) // tickle once, very soon after starting, to get a host list tklr.Add_spot(hcheck_freq, my_chan, REQ_CHOSTLIST, nil, ipc.FOREVER) // tickles us every once in a while to update host list fq_sheep.Baa(2, "host list will be requested from openstack every %ds", hcheck_freq) } else { host_list = switch_hosts fq_sheep.Baa(0, "static host list from config used for setting OVS queues: %s", *host_list) } if sdn_host != nil && *sdn_host != "" { uri_prefix = fmt.Sprintf("http://%s", *sdn_host) } fq_sheep.Baa(1, "flowmod-queue manager is running, sdn host: %s", *sdn_host) for { msg = <-my_chan // wait for next message msg.State = nil // default to all OK fq_sheep.Baa(3, "processing message: %d", msg.Msg_type) switch msg.Msg_type { case REQ_GEN_FMOD: // generic fmod; just pass it along w/o any special handling if msg.Req_data != nil { fdata = msg.Req_data.(*Fq_req) // pointer at struct with all of our expected goodies send_gfmod_agent(fdata, ip2mac, host_list, phost_suffix) } case REQ_BWOW_RESERVE: // oneway bandwidth flow-mod generation msg.Response_ch = nil // nothing goes back from this fdata = msg.Req_data.(*Fq_req) // pointer at struct with all of the expected goodies send_bwow_fmods(fdata, ip2mac, phost_suffix) case REQ_BW_RESERVE: // bandwidth endpoint flow-mod creation; single agent script creates all needed fmods fdata = msg.Req_data.(*Fq_req) // pointer at struct with all of the expected goodies send_bw_fmods(fdata, ip2mac, phost_suffix) msg.Response_ch = nil // nothing goes back from this case REQ_IE_RESERVE: // proactive ingress/egress reservation flowmod (this is likely deprecated as of 3/21/2015 -- resmgr invokes the bw_fmods script via agent) fdata = msg.Req_data.(*Fq_req) // user view of what the flow-mod should be if uri_prefix != "" { // an sdn controller -- skoogi -- is enabled msg.State = gizmos.SK_ie_flowmod(&uri_prefix, *fdata.Match.Ip1, *fdata.Match.Ip2, fdata.Expiry, fdata.Espq.Queuenum, fdata.Espq.Switch, fdata.Espq.Port) if msg.State == nil { // no error, no response to requestor fq_sheep.Baa(2, "proactive reserve successfully sent: uri=%s h1=%s h2=%s exp=%d qnum=%d swid=%s port=%d dscp=%d", uri_prefix, fdata.Match.Ip1, fdata.Match.Ip2, fdata.Expiry, fdata.Espq.Queuenum, fdata.Espq.Switch, fdata.Espq.Port) msg.Response_ch = nil } else { // do we need to suss out the id and mark it failed, or set a timer on it, so as not to flood reqmgr with errors? fq_sheep.Baa(1, "ERR: proactive reserve failed: uri=%s h1=%s h2=%s exp=%d qnum=%d swid=%s port=%d [TGUFQM008]", uri_prefix, fdata.Match.Ip1, fdata.Match.Ip2, fdata.Expiry, fdata.Espq.Queuenum, fdata.Espq.Switch, fdata.Espq.Port) } } else { // q-lite now generates one flowmod in each direction because of the ITONS requirements if send_all || fdata.Espq.Queuenum > 1 { // if sending all fmods, or this has a non-intermediate queue cdata := fdata.Clone() // copy so we can alter w/o affecting sender's copy if cdata.Espq.Port == -128 { // we'll assume in this case that the switch given is the host name and we need to set the switch to br-int swid := "br-int" cdata.Swid = &swid } if cdata.Resub == nil { resub_list := "" // resub to alternate table to set a meta mark, then to table 0 to hit openstack junk if cdata.Single_switch || fdata.Dir_in { // must use the base table for inbound traffic OR same switch traffic (bug 2015/1/26) resub_list = fmt.Sprintf("%d 0", alt_table) // base alt_table is for 'local' traffic (trafic that doesn't go through br-rl } else { resub_list = fmt.Sprintf("%d 0", alt_table+1) // base+1 is for OUTBOUND only traffic that must go through the rate limiting bridge } cdata.Resub = &resub_list } meta := "0x00/0x07" // match-value/mask; match only when meta neither of our two bits, nor the agent bit (0x04) are set cdata.Match.Meta = &meta if fdata.Dir_in { // inbound to this switch we need to revert dscp from our settings to the 'origianal' settings if cdata.Single_switch { cdata.Match.Dscp = -1 // there is no match if both on same switch send_gfmod_agent(cdata, ip2mac, host_list, phost_suffix) } else { cdata.Match.Dscp = cdata.Dscp // match the dscp that was added on ingress if !cdata.Dscp_koe { // dropping the value on exit cdata.Action.Dscp = 0 // set action to turn it off, otherwise we let it ride (no overt action) } send_gfmod_agent(cdata, ip2mac, host_list, phost_suffix) } } else { // outbound from this switch set the dscp value specified on the reservation cdata.Match.Dscp = -1 // on outbound there is no dscp match, ensure this is off if cdata.Single_switch { send_gfmod_agent(cdata, ip2mac, host_list, phost_suffix) // in single switch mode there is no dscp value needed } else { cdata.Action.Dscp = cdata.Dscp // otherwise set the value and send send_gfmod_agent(cdata, ip2mac, host_list, phost_suffix) } } } msg.Response_ch = nil } case REQ_ST_RESERVE: // reservation fmods for traffic steering msg.Response_ch = nil // for now, nothing goes back if msg.Req_data != nil { fq_data := msg.Req_data.(*Fq_req) // request data if uri_prefix != "" { // an sdn controller -- skoogi -- is enabled (not supported) fq_sheep.Baa(0, "ERR: steering reservations are not supported with skoogi (SDNC); no flow-mods pushed") } else { send_stfmod_agent(fq_data, ip2mac, host_list) } } else { fq_sheep.Baa(0, "CRI: missing data on st-reserve request to fq-mgr") } case REQ_SK_RESERVE: // send a reservation to skoogi data = msg.Req_data.([]interface{}) // msg data expected to be array of interface: h1, h2, expiry, queue h1/2 must be IP addresses if uri_prefix != "" { fq_sheep.Baa(2, "msg to reserve: %s %s %s %d %d", uri_prefix, data[0].(string), data[1].(string), data[2].(int64), data[3].(int)) msg.State = gizmos.SK_reserve(&uri_prefix, data[0].(string), data[1].(string), data[2].(int64), data[3].(int)) } else { fq_sheep.Baa(1, "reservation not sent, no sdn-host defined: %s %s %s %d %d", uri_prefix, data[0].(string), data[1].(string), data[2].(int64), data[3].(int)) } case REQ_SETQUEUES: // request from reservation manager which indicates something changed and queues need to be reset qlist := msg.Req_data.([]interface{})[0].([]string) if ssq_cmd != nil { adjust_queues(qlist, ssq_cmd, host_list) // if writing to a file and driving a local script } else { adjust_queues_agent(qlist, host_list, phost_suffix) // if sending json to an agent } case REQ_CHOSTLIST: // this is tricky as it comes from tickler as a request, and from osifmgr as a response, be careful! msg.Response_ch = nil // regardless of source, we should not reply to this request if msg.State != nil || msg.Response_data != nil { // response from ostack if with list or error if msg.Response_data.(*string) != nil { hls := strings.TrimLeft(*(msg.Response_data.(*string)), " \t") // ditch leading whitespace hl := &hls if *hl != "" { host_list = hl // ok to use it if phost_suffix != nil { fq_sheep.Baa(2, "host list from osif before suffix added: %s", *host_list) host_list = add_phost_suffix(host_list, phost_suffix) // in some cases ostack sends foo, but we really need to use foo-suffix (sigh) } send_hlist_agent(host_list) // send to agent_manager fq_sheep.Baa(2, "host list received from osif: %s", *host_list) } else { fq_sheep.Baa(1, "host list received from osif was discarded: ()") } } else { fq_sheep.Baa(0, "WRN: no data from openstack; expected host list string [TGUFQM009]") } } else { req_hosts(my_chan, fq_sheep) // send requests to osif for data } case REQ_IP2MACMAP: // a new map from osif if msg.Req_data != nil { newmap := msg.Req_data.(map[string]*string) if len(newmap) > 0 { ip2mac = newmap // safe to replace fq_sheep.Baa(2, "ip2mac translation received from osif: %d elements", len(ip2mac)) } else { if ip2mac != nil { fq_sheep.Baa(2, "ip2mac translation received from osif: 0 elements -- kept old table with %d elements", len(ip2mac)) } else { fq_sheep.Baa(2, "ip2mac translation received from osif: 0 elements -- no existing table to keep") } } } else { fq_sheep.Baa(0, "WRN: no data from osif (nil map); expected ip2mac translation map [TGUFQM010]") } msg.State = nil // state is always good default: fq_sheep.Baa(1, "unknown request: %d", msg.Msg_type) msg.Response_data = nil if msg.Response_ch != nil { msg.State = fmt.Errorf("unknown request (%d)", msg.Msg_type) } } fq_sheep.Baa(3, "processing message complete: %d", msg.Msg_type) if msg.Response_ch != nil { // if a reqponse channel was provided fq_sheep.Baa(3, "sending response: %d", msg.Msg_type) msg.Response_ch <- msg // send our result back to the requestor } } }