Exemplo n.º 1
0
func TestChkpt(t *testing.T) {
	c := chkpt.Mk_chkpt("/tmp/foo", 3, 5)
	c.Add_md5()

	if c == nil {
		t.Errorf("creation of chkpt obj failed")
		return
	}

	err := c.Create()

	if err != nil {
		t.Errorf("open file for write failed: %s", err)
		return
	}

	fmt.Fprintf(c, "this is a test! %d\n", time.Now().Unix()) // Chkpt can be used directly in a format statement if open
	fname, err := c.Close()

	if err != nil {
		fmt.Printf("error reported by close: %s\n", err)
		t.Fail()
	}
	fmt.Printf("output chkpt to: %s\n", fname)
}
Exemplo n.º 2
0
Arquivo: res_mgr.go Projeto: att/tegu
/*
	Executes as a goroutine to drive the reservation manager portion of tegu.
*/
func Res_manager( my_chan chan *ipc.Chmsg, cookie *string ) {

	var (
		inv	*Inventory
		msg	*ipc.Chmsg
		ckptd	string
		last_qcheck	int64 = 0			// time that the last queue check was made to set window
		last_chkpt	int64 = 0			// time that the last checkpoint was written
		retry_chkpt bool = false		// checkpoint needs to be retried because of a timing issue
		queue_gen_type = REQ_GEN_EPQMAP
		alt_table = DEF_ALT_TABLE		// table number where meta marking happens
		all_sys_up	bool = false;		// set when we receive the all_up message; some functions (chkpt) must wait for this
		hto_limit 	int = 3600 * 18		// OVS has a size limit to the hard timeout value, this caps it just under the OVS limit
		res_refresh	int64 = 0			// next time when we must force all reservations to refresh flow-mods (hto_limit nonzero)
		rr_rate		int = 3600			// refresh rate (1 hour)
		favour_v6 bool = true			// favour ipv6 addresses if a host has both defined.
	)

	super_cookie = cookie				// global for all methods

	rm_sheep = bleater.Mk_bleater( 0, os.Stderr )		// allocate our bleater and attach it to the master
	rm_sheep.Set_prefix( "res_mgr" )
	tegu_sheep.Add_child( rm_sheep )					// we become a child so that if the master vol is adjusted we'll react too

	p := cfg_data["default"]["queue_type"]				// lives in default b/c used by fq-mgr too
	if p != nil {
		if *p == "endpoint" {
			queue_gen_type = REQ_GEN_EPQMAP
		} else {
			queue_gen_type = REQ_GEN_QMAP
		}
	}

	p = cfg_data["default"]["alttable"]				// alt table for meta marking
	if p != nil {
		alt_table = clike.Atoi( *p )
	}

	p = cfg_data["default"]["favour_ipv6"]
	if p != nil {
		favour_v6 = *p == "true"
	}

	if cfg_data["resmgr"] != nil {
		cdp := cfg_data["resmgr"]["chkpt_dir"]
		if cdp == nil {
			ckptd = "/var/lib/tegu/resmgr"							// default directory and prefix
		} else {
			ckptd = *cdp + "/resmgr"							// add prefix to directory in config
		}

		p = cfg_data["resmgr"]["verbose"]
		if p != nil {
			rm_sheep.Set_level(  uint( clike.Atoi( *p ) ) )
		}

		/*
		p = cfg_data["resmgr"]["set_vlan"]
		if p != nil {
			set_vlan = *p == "true"
		}
		*/

		p = cfg_data["resmgr"]["super_cookie"]
		if p != nil {
			super_cookie = p
			rm_sheep.Baa( 1, "super-cookie was set from config file" )
		}

		p = cfg_data["resmgr"]["hto_limit"]					// if OVS or whatever has a max timeout we can ensure it's not surpassed
		if p != nil {
			hto_limit = clike.Atoi( *p )
		}

		p = cfg_data["resmgr"]["res_refresh"]				// rate that reservations are refreshed if hto_limit is non-zero
		if p != nil {
			rr_rate = clike.Atoi( *p )
			if rr_rate < 900 {
				if rr_rate < 120 {
					rm_sheep.Baa( 0, "NOTICE: reservation refresh rate in config is insanely low (%ds) and was changed to 1800s", rr_rate )
					rr_rate = 1800
				} else {
					rm_sheep.Baa( 0, "NOTICE: reservation refresh rate in config is too low: %ds", rr_rate )
				}
			}
		}
	}

	send_meta_counter := 200;										// send meta f-mods only now and again
	rm_sheep.Baa( 1, "ovs table number %d used for metadata marking", alt_table )

	res_refresh = time.Now().Unix() + int64( rr_rate )				// set first refresh in an hour (ignored if hto_limit not set
	inv = Mk_inventory( )
	inv.chkpt = chkpt.Mk_chkpt( ckptd, 10, 90 )

	last_qcheck = time.Now().Unix()

	tkl_ch := make( chan *ipc.Chmsg, 5 )								// special, short buffer, channel for tickles allows 5 to queue before blocking sender
	tklr.Add_spot( 2, tkl_ch, REQ_PUSH, nil, ipc.FOREVER )				// push reservations to agent just before they go live
	tklr.Add_spot( 1, tkl_ch, REQ_SETQUEUES, nil, ipc.FOREVER )			// drives us to see if queues need to be adjusted
	tklr.Add_spot( 5, tkl_ch, REQ_RTRY_CHKPT, nil, ipc.FOREVER )		// ensures that we retried any missed checkpoints
	tklr.Add_spot( 60, tkl_ch, REQ_VET_RETRY, nil, ipc.FOREVER )		// run the retry queue if it has size

	go rm_lookup( rmgrlu_ch, inv )

	rm_sheep.Baa( 3, "res_mgr is running  %x", my_chan )
	for {
		select {									// select next ready message on either channel
			case msg = <- tkl_ch:					// msg available on tickle channel
				msg.State = nil						// nil state is OK, no error
				my_chan <- msg;						// just pass it through; tkl_ch has a small buffer (blocks quickly) and this prevents filling the main queue w/ tickles if we get busy	

			case msg = <- my_chan:					// process message from the main channel
				rm_sheep.Baa( 3, "processing message: %d", msg.Msg_type )
				switch msg.Msg_type {
					case REQ_NOOP:			// just ignore

					case REQ_ADD:
						msg.State = inv.Add_res( msg.Req_data )			// add will determine the pledge type and do the right thing
						msg.Response_data = nil


					case REQ_ALLUP:			// signals that all initialisation is complete (chkpting etc. can go)
						all_sys_up = true
						// periodic checkpointing turned off with the introduction of tegu_ha
						//tklr.Add_spot( 180, my_chan, REQ_CHKPT, nil, ipc.FOREVER )		// tickle spot to drive us every 180 seconds to checkpoint

					case REQ_RTRY_CHKPT:									// called to attempt to send a queued checkpoint request
						if all_sys_up {
							if retry_chkpt {
								rm_sheep.Baa( 3, "invoking checkpoint (retry)" )
								retry_chkpt, last_chkpt = inv.write_chkpt( last_chkpt )
							}
						}

					case REQ_CHKPT:											// external thread has requested checkpoint
						if all_sys_up {
							rm_sheep.Baa( 3, "invoking checkpoint" )
							retry_chkpt, last_chkpt = inv.write_chkpt( last_chkpt )
						}

					case REQ_DEL:											// user initiated delete -- requires cookie
						data := msg.Req_data.( []*string )					// assume pointers to name and cookie
						if data[0] != nil  &&  *data[0] == "all" {
							inv.Del_all_res( data[1] )
							msg.State = nil
						} else {
							msg.State = inv.Del_res( data[0], data[1] )
						}

						inv.push_reservations( my_chan, alt_table, int64( hto_limit ), favour_v6 )			// must force a push to push augmented (shortened) reservations
						msg.Response_data = nil

					case REQ_DUPCHECK:
						if msg.Req_data != nil {
							msg.Response_data, msg.State = inv.dup_check(  msg.Req_data.( *gizmos.Pledge ) )
						}

					case REQ_GET:											// user initiated get -- requires cookie
						data := msg.Req_data.( []*string )					// assume pointers to name and cookie
						msg.Response_data, msg.State = inv.Get_res( data[0], data[1] )

					case REQ_LIST:											// list reservations	(for a client)
						msg.Response_data, msg.State = inv.res2json( )

					case REQ_LOAD:								// load from a checkpoint file
						data := msg.Req_data.( *string )		// assume pointers to name and cookie
						msg.State = inv.load_chkpt( data )
						msg.Response_data = nil
						rm_sheep.Baa( 1, "checkpoint file loaded" )

					case REQ_PAUSE:
						msg.State = nil							// right now this cannot fail in ways we know about
						msg.Response_data = ""
						inv.pause_on()
						res_refresh = 0;						// must force a push of everything on next push tickle
						rm_sheep.Baa( 1, "pausing..." )

					case REQ_RESUME:
						msg.State = nil							// right now this cannot fail in ways we know about
						msg.Response_data = ""
						res_refresh = 0;						// must force a push of everything on next push tickle
						inv.pause_off()

					case REQ_SETQUEUES:							// driven about every second to reset the queues if a reservation state has changed
						now := time.Now().Unix()
						if now > last_qcheck  &&  inv.any_concluded( now - last_qcheck ) || inv.any_commencing( now - last_qcheck, 0 ) {
							rm_sheep.Baa( 1, "channel states: rm=%d rmlu=%d fq=%d net=%d agent=%d", len( rmgr_ch ), len( rmgrlu_ch ), len( fq_ch ), len( nw_ch ), len( am_ch ) )
							rm_sheep.Baa( 1, "reservation state change detected, requesting queue map from net-mgr" )
							tmsg := ipc.Mk_chmsg( )
							tmsg.Send_req( nw_ch, my_chan, queue_gen_type, time.Now().Unix(), nil )		// get a queue map; when it arrives we'll push to fqmgr and trigger flow-mod push
						}
						last_qcheck = now

					case REQ_PUSH:								// driven every few seconds to check for need to refresh because of switch max timeout setting
						if hto_limit > 0 {						// if reservation flow-mods are capped with a hard timeout limit
							now := time.Now().Unix()
							if now > res_refresh {
								rm_sheep.Baa( 2, "refreshing all reservations" )
								inv.reset_push()							// reset pushed flag on all reservations to cause active ones to be pushed again
								res_refresh = now + int64( rr_rate )		// push everything again in an hour

								inv.push_reservations( my_chan, alt_table, int64( hto_limit ), favour_v6 )			// force a push of all
							}
						}


					case REQ_PLEDGE_LIST:						// generate a list of pledges that are related to the given VM
						msg.Response_data, msg.State = inv.pledge_list(  msg.Req_data.( *string ) )

					case REQ_SETULCAP:							// user link capacity; expect array of two string pointers (name and value)
						data := msg.Req_data.( []*string )
						inv.add_ulcap( data[0], data[1] )
						retry_chkpt, last_chkpt = inv.write_chkpt( last_chkpt )

					// CAUTION: the requests below come back as asynch responses rather than as initial message
					case REQ_IE_RESERVE:						// an IE reservation failed
						msg.Response_ch = nil					// immediately disable to prevent loop
						inv.failed_push( msg )					// suss out the pledge and mark it unpushed

					case REQ_GEN_QMAP:							// response caries the queue map that now should be sent to fq-mgr to drive a queue update
						fallthrough

					case REQ_GEN_EPQMAP:
						rm_sheep.Baa( 1, "received queue map from network manager" )

						qlist := msg.Response_data.( []string )							// get the qulist map for our use first
						if send_meta_counter >= 200 {
							send_meta_fmods( qlist, alt_table )								// push meta rules
							send_meta_counter = 0
						} else {
							send_meta_counter++
						}

						msg.Response_ch = nil											// immediately disable to prevent loop
						fq_data := make( []interface{}, 1 )
						fq_data[FQ_QLIST] = msg.Response_data
						tmsg := ipc.Mk_chmsg( )
						tmsg.Send_req( fq_ch, nil, REQ_SETQUEUES, fq_data, nil )		// send the queue list to fq manager to deal with

						inv.push_reservations( my_chan, alt_table, int64( hto_limit ), favour_v6 )			// now safe to push reservations if any activated

					case REQ_VET_RETRY:
						if inv != nil && len( inv.retry ) > 0 {
							inv.vet_retries( )
						}

					case REQ_YANK_RES:										// yank a reservation from the inventory returning the pledge and allowing flow-mods to purge
						if msg.Response_ch != nil {
							msg.Response_data, msg.State = inv.yank_res( msg.Req_data.( *string ) )
						}

					/* deprecated -- moved to rm_lookup
					case REQ_GET_MIRRORS:									// user initiated get list of mirrors
						t := inv.Get_mirrorlist()
						msg.Response_data = &t;
					*/

					default:
						rm_sheep.Baa( 0, "WRN: res_mgr: unknown message: %d [TGURMG001]", msg.Msg_type )
						msg.Response_data = nil
						msg.State = fmt.Errorf( "res_mgr: unknown message (%d)", msg.Msg_type )
						msg.Response_ch = nil				// we don't respond to these.
				}	// end main channel case

		}		// end select

		rm_sheep.Baa( 3, "processing message complete: %d", msg.Msg_type )
		if msg.Response_ch != nil {			// if a response channel was provided
			msg.Response_ch <- msg			// send our result back to the requester
		}
	}
}