Exemple #1
0
// Kills all eventually remaining processes from the last Deploy-run
func (d *Deterlab) Cleanup() error {
	// Cleanup eventual ssh from the proxy-forwarding to the logserver
	//err := exec.Command("kill", "-9", "$(ps x  | grep ssh | grep nNTf | cut -d' ' -f1)").Run()
	err := exec.Command("pkill", "-9", "-f", "ssh -nNTf").Run()
	if err != nil {
		dbg.Lvl3("Error stopping ssh:", err)
	}

	// SSH to the deterlab-server and end all running users-processes
	dbg.Lvl3("Going to kill everything")
	var sshKill chan string
	sshKill = make(chan string)
	go func() {
		// Cleanup eventual residues of previous round - users and sshd
		cliutils.SshRun(d.Login, d.Host, "killall -9 users sshd")
		err = cliutils.SshRunStdout(d.Login, d.Host, "test -f remote/users && ( cd remote; ./users -kill )")
		if err != nil {
			dbg.Lvl1("NOT-Normal error from cleanup")
			sshKill <- "error"
		}
		sshKill <- "stopped"
	}()

	for {
		select {
		case msg := <-sshKill:
			if msg == "stopped" {
				dbg.Lvl3("Users stopped")
				return nil
			} else {
				dbg.Lvl2("Received other command", msg, "probably the app didn't quit correctly")
			}
		case <-time.After(time.Second * 20):
			dbg.Lvl3("Timeout error when waiting for end of ssh")
			return nil
		}
	}

	return nil
}
Exemple #2
0
func main() {
	deter, err := deploy.ReadConfig("remote")
	if err != nil {
		log.Fatal("Couldn't read config in deter:", err)
	}
	conf = deter.Config
	dbg.DebugVisible = conf.Debug

	dbg.Lvl1("running deter with nmsgs:", conf.Nmsgs, "rate:", conf.Rate, "rounds:", conf.Rounds, "debug:", conf.Debug)

	virt, err := cliutils.ReadLines("remote/virt.txt")
	if err != nil {
		log.Fatal(err)
	}
	phys, err := cliutils.ReadLines("remote/phys.txt")
	if err != nil {
		log.Fatal(err)
	}
	vpmap := make(map[string]string)
	for i := range virt {
		vpmap[virt[i]] = phys[i]
	}
	// kill old processes
	var wg sync.WaitGroup
	doneHosts := make([]bool, len(phys))
	for i, h := range phys {
		wg.Add(1)
		go func(i int, h string) {
			defer wg.Done()
			dbg.Lvl4("Cleaning up host", h)
			cliutils.SshRun("", h, "sudo killall app forkexec logserver timeclient scp ssh 2>/dev/null >/dev/null")
			time.Sleep(1 * time.Second)
			cliutils.SshRun("", h, "sudo killall app 2>/dev/null >/dev/null")
			if dbg.DebugVisible > 3 {
				dbg.Lvl4("Killing report:")
				cliutils.SshRunStdout("", h, "ps ax")
			}
			doneHosts[i] = true
			dbg.Lvl3("Host", h, "cleaned up")
		}(i, h)
	}

	cleanupChannel := make(chan string)
	go func() {
		wg.Wait()
		dbg.Lvl3("Done waiting")
		cleanupChannel <- "done"
	}()
	select {
	case msg := <-cleanupChannel:
		dbg.Lvl3("Received msg from cleanupChannel", msg)
	case <-time.After(time.Second * 10):
		for i, m := range doneHosts {
			if !m {
				dbg.Lvl1("Missing host:", phys[i])
			}
		}
		dbg.Fatal("Didn't receive all replies.")
	}

	if kill {
		dbg.Lvl1("Returning only from cleanup")
		return
	}

	/*
		 * Why copy the stuff to the other nodes? We have NFS, no?
		for _, h := range phys {
			wg.Add(1)
			go func(h string) {
				defer wg.Done()
				cliutils.Rsync("", h, "remote", "")
			}(h)
		}
		wg.Wait()
	*/

	nloggers := conf.Nloggers
	masterLogger := phys[0]
	loggers := []string{masterLogger}
	dbg.Lvl3("Going to create", nloggers, "loggers")
	for n := 1; n < nloggers; n++ {
		loggers = append(loggers, phys[n])
	}

	phys = phys[nloggers:]
	virt = virt[nloggers:]

	// Read in and parse the configuration file
	file, err := ioutil.ReadFile("remote/tree.json")
	if err != nil {
		log.Fatal("deter.go: error reading configuration file: %v\n", err)
	}
	dbg.Lvl4("cfg file:", string(file))
	var cf config.ConfigFile
	err = json.Unmarshal(file, &cf)
	if err != nil {
		log.Fatal("unable to unmarshal config.ConfigFile:", err)
	}

	hostnames := cf.Hosts
	dbg.Lvl4("hostnames:", hostnames)

	depth := graphs.Depth(cf.Tree)
	var random_leaf string
	cf.Tree.TraverseTree(func(t *graphs.Tree) {
		if random_leaf != "" {
			return
		}
		if len(t.Children) == 0 {
			random_leaf = t.Name
		}
	})

	rootname = hostnames[0]

	dbg.Lvl4("depth of tree:", depth)

	// mapping from physical node name to the timestamp servers that are running there
	// essentially a reverse mapping of vpmap except ports are also used
	physToServer := make(map[string][]string)
	for _, virt := range hostnames {
		v, _, _ := net.SplitHostPort(virt)
		p := vpmap[v]
		ss := physToServer[p]
		ss = append(ss, virt)
		physToServer[p] = ss
	}

	// start up the logging server on the final host at port 10000
	dbg.Lvl1("starting up logservers: ", loggers)
	// start up the master logger
	loggerports := make([]string, len(loggers))
	for i, logger := range loggers {
		loggerport := logger + ":10000"
		loggerports[i] = loggerport
		// redirect to the master logger
		master := masterLogger + ":10000"
		// if this is the master logger than don't set the master to anything
		if loggerport == masterLogger+":10000" {
			master = ""
		}

		// Copy configuration file to make higher file-limits
		err = cliutils.SshRunStdout("", logger, "sudo cp remote/cothority.conf /etc/security/limits.d")

		if err != nil {
			log.Fatal("Couldn't copy limit-file:", err)
		}

		go cliutils.SshRunStdout("", logger, "cd remote; sudo ./logserver -addr="+loggerport+
			" -master="+master)
	}

	i := 0
	// For coll_stamp we have to wait for everything in place which takes quite some time
	// We set up a directory and every host writes a file once he's ready to listen
	// When everybody is ready, the directory is deleted and the test starts
	coll_stamp_dir := "remote/coll_stamp_up"
	if conf.App == "coll_stamp" || conf.App == "coll_sign" {
		os.RemoveAll(coll_stamp_dir)
		os.MkdirAll(coll_stamp_dir, 0777)
		time.Sleep(time.Second)
	}
	dbg.Lvl1("starting", len(physToServer), "forkexecs")
	totalServers := 0
	for phys, virts := range physToServer {
		if len(virts) == 0 {
			continue
		}
		totalServers += len(virts)
		dbg.Lvl1("Launching forkexec for", len(virts), "clients on", phys)
		//cmd := GenExecCmd(phys, virts, loggerports[i], random_leaf)
		i = (i + 1) % len(loggerports)
		wg.Add(1)
		go func(phys string) {
			//dbg.Lvl4("running on ", phys, cmd)
			defer wg.Done()
			dbg.Lvl4("Starting servers on physical machine ", phys)
			err := cliutils.SshRunStdout("", phys, "cd remote; sudo ./forkexec"+
				" -physaddr="+phys+" -logger="+loggerports[i])
			if err != nil {
				log.Fatal("Error starting timestamper:", err, phys)
			}
			dbg.Lvl4("Finished with Timestamper", phys)
		}(phys)
	}

	if conf.App == "coll_stamp" || conf.App == "coll_sign" {
		// Every stampserver that started up (mostly waiting for configuration-reading)
		// writes its name in coll_stamp_dir - once everybody is there, the directory
		// is cleaned to flag it's OK to go on.
		start_config := time.Now()
		for {
			files, err := ioutil.ReadDir(coll_stamp_dir)
			if err != nil {
				log.Fatal("Couldn't read directory", coll_stamp_dir, err)
			} else {
				dbg.Lvl1("Stampservers started:", len(files), "/", totalServers, "after", time.Since(start_config))
				if len(files) == totalServers {
					os.RemoveAll(coll_stamp_dir)
					// 1st second for everybody to see the deleted directory
					// 2nd second for everybody to start up listening
					time.Sleep(2 * time.Second)
					break
				}
			}
			time.Sleep(time.Second)
		}
	}

	switch conf.App {
	case "coll_stamp":
		dbg.Lvl1("starting", len(physToServer), "time clients")
		// start up one timeclient per physical machine
		// it requests timestamps from all the servers on that machine
		for p, ss := range physToServer {
			if len(ss) == 0 {
				continue
			}
			servers := strings.Join(ss, ",")
			go func(i int, p string) {
				_, err := cliutils.SshRun("", p, "cd remote; sudo ./app -mode=client -app="+conf.App+
					" -name=client@"+p+
					" -server="+servers+
					" -logger="+loggerports[i])
				if err != nil {
					dbg.Lvl4("Deter.go : timeclient error ", err)
				}
				dbg.Lvl4("Deter.go : Finished with timeclient", p)
			}(i, p)
			i = (i + 1) % len(loggerports)
		}
	case "coll_sign_no":
		// TODO: for now it's only a simple startup from the server
		dbg.Lvl1("Starting only one client")
		/*
			p := physToServer[0][0]
			servers := strings.Join(physToServer[0][1], ",")
			_, err = cliutils.SshRun("", p, "cd remote; sudo ./app -mode=client -app=" + conf.App +
			" -name=client@" + p +
			" -server=" + servers +
			" -logger=" + loggerports[i])
			i = (i + 1) % len(loggerports)
		*/
	}

	// wait for the servers to finish before stopping
	wg.Wait()
	//time.Sleep(10 * time.Minute)
}
Exemple #3
0
func main() {
	deterlab.ReadConfig()
	flag.Parse()

	vpmap := make(map[string]string)
	for i := range deterlab.Virt {
		vpmap[deterlab.Virt[i]] = deterlab.Phys[i]
	}
	// kill old processes
	var wg sync.WaitGroup
	re := regexp.MustCompile(" +")
	hosts, err := exec.Command("/usr/testbed/bin/node_list", "-e", deterlab.Project+","+deterlab.Experiment).Output()
	if err != nil {
		dbg.Fatal("Deterlab experiment", deterlab.Project+"/"+deterlab.Experiment, "seems not to be swapped in. Aborting.")
		os.Exit(-1)
	}
	hosts_trimmed := strings.TrimSpace(re.ReplaceAllString(string(hosts), " "))
	hostlist := strings.Split(hosts_trimmed, " ")
	doneHosts := make([]bool, len(hostlist))
	dbg.Lvl2("Found the following hosts:", hostlist)
	if kill {
		dbg.Lvl1("Cleaning up", len(hostlist), "hosts.")
	}
	for i, h := range hostlist {
		wg.Add(1)
		go func(i int, h string) {
			defer wg.Done()
			if kill {
				dbg.Lvl4("Cleaning up host", h, ".")
				cliutils.SshRun("", h, "sudo killall -9 "+deterlab.App+" logserver forkexec timeclient scp 2>/dev/null >/dev/null")
				time.Sleep(1 * time.Second)
				cliutils.SshRun("", h, "sudo killall -9 "+deterlab.App+" 2>/dev/null >/dev/null")
				time.Sleep(1 * time.Second)
				// Also kill all other process that start with "./" and are probably
				// locally started processes
				cliutils.SshRun("", h, "sudo pkill -9 -f '\\./'")
				time.Sleep(1 * time.Second)
				if dbg.DebugVisible > 3 {
					dbg.Lvl4("Cleaning report:")
					cliutils.SshRunStdout("", h, "ps aux")
				}
			} else {
				dbg.Lvl3("Setting the file-limit higher on", h)

				// Copy configuration file to make higher file-limits
				err := cliutils.SshRunStdout("", h, "sudo cp remote/cothority.conf /etc/security/limits.d")
				if err != nil {
					dbg.Fatal("Couldn't copy limit-file:", err)
				}
			}
			doneHosts[i] = true
			dbg.Lvl3("Host", h, "cleaned up")
		}(i, h)
	}

	cleanupChannel := make(chan string)
	go func() {
		wg.Wait()
		dbg.Lvl3("Done waiting")
		cleanupChannel <- "done"
	}()
	select {
	case msg := <-cleanupChannel:
		dbg.Lvl3("Received msg from cleanupChannel", msg)
	case <-time.After(time.Second * 20):
		for i, m := range doneHosts {
			if !m {
				dbg.Lvl1("Missing host:", hostlist[i], "- You should run")
				dbg.Lvl1("/usr/testbed/bin/node_reboot", hostlist[i])
			}
		}
		dbg.Fatal("Didn't receive all replies while cleaning up - aborting.")
	}

	if kill {
		dbg.Lvl2("Only cleaning up - returning")
		return
	}

	// ADDITIONS : the monitoring part
	// Proxy will listen on Sink:SinkPort and redirect every packet to
	// RedirectionAddress:RedirectionPort. With remote tunnel forwarding it will
	// be forwarded to the real sink
	dbg.Lvl2("Launching proxy redirecting to", deterlab.ProxyRedirectionAddress, ":", deterlab.ProxyRedirectionPort)
	go monitor.Proxy(deterlab.ProxyRedirectionAddress + ":" + deterlab.ProxyRedirectionPort)
	time.Sleep(time.Second)

	hostnames := deterlab.Hostnames
	dbg.Lvl4("hostnames:", hostnames)

	// mapping from physical node name to the timestamp servers that are running there
	// essentially a reverse mapping of vpmap except ports are also used
	physToServer := make(map[string][]string)
	for _, virt := range hostnames {
		v, _, _ := net.SplitHostPort(virt)
		p := vpmap[v]
		ss := physToServer[p]
		ss = append(ss, virt)
		physToServer[p] = ss
	}

	monitorAddr := deterlab.MonitorAddress + ":" + monitor.SinkPort
	servers := len(physToServer)
	ppm := len(deterlab.Hostnames) / servers
	dbg.Lvl1("starting", servers, "forkexecs with", ppm, "processes each =", servers*ppm)
	totalServers := 0
	for phys, virts := range physToServer {
		if len(virts) == 0 {
			continue
		}
		totalServers += len(virts)
		dbg.Lvl2("Launching forkexec for", len(virts), "clients on", phys)
		wg.Add(1)
		go func(phys string) {
			//dbg.Lvl4("running on", phys, cmd)
			defer wg.Done()
			dbg.Lvl4("Starting servers on physical machine", phys, "with logger =", deterlab.MonitorAddress+":"+monitor.SinkPort)
			err := cliutils.SshRunStdout("", phys, "cd remote; sudo ./forkexec"+
				" -physaddr="+phys+" -logger="+deterlab.MonitorAddress+":"+monitor.SinkPort)
			if err != nil {
				dbg.Lvl1("Error starting timestamper:", err, phys)
			}
			dbg.Lvl4("Finished with Timestamper", phys)
		}(phys)
	}

	if deterlab.App == "stamp" || deterlab.App == "sign" {
		// Every stampserver that started up (mostly waiting for configuration-reading)
		// writes its name in coll_stamp_dir - once everybody is there, the directory
		// is cleaned to flag it's OK to go on.
		start_config := time.Now()
		for {
			s, err := monitor.GetReady(monitorAddr)
			if err != nil {
				log.Fatal("Couldn't contact monitor")
			} else {
				dbg.Lvl1("Processes started:", s.Ready, "/", totalServers, "after", time.Since(start_config))
				if s.Ready == totalServers {
					dbg.Lvl2("Everybody ready, starting")
					// 1st second for everybody to see the deleted directory
					// 2nd second for everybody to start up listening
					time.Sleep(time.Second * 2)
					break
				}
			}
			time.Sleep(time.Second)
		}
	}

	switch deterlab.App {
	case "stamp":
		dbg.Lvl1("starting", len(physToServer), "time clients")
		// start up one timeclient per physical machine
		// it requests timestamps from all the servers on that machine
		amroot := true
		for p, ss := range physToServer {
			if len(ss) == 0 {
				dbg.Lvl3("ss is empty - not starting")
				continue
			}
			servers := strings.Join(ss, ",")
			dbg.Lvl3("Starting with ss=", ss)
			go func(p string, a bool) {
				cmdstr := "cd remote; sudo ./" + deterlab.App + " -mode=client " +
					" -name=client@" + p +
					" -server=" + servers +
					" -amroot=" + strconv.FormatBool(a)
				dbg.Lvl3("Users will launch client:", cmdstr)
				err := cliutils.SshRunStdout("", p, cmdstr)
				if err != nil {
					dbg.Lvl4("Deter.go: error for", deterlab.App, err)
				}
				dbg.Lvl4("Deter.go: Finished with", deterlab.App, p)
			}(p, amroot)
			amroot = false
		}
	case "sign_no":
		// TODO: for now it's only a simple startup from the server
		dbg.Lvl1("Starting only one client")
	}

	// wait for the servers to finish before stopping
	wg.Wait()
}