Beispiel #1
0
func (d *Deterlab) Start(args ...string) error {
	// setup port forwarding for viewing log server
	d.started = true
	// Remote tunneling : the sink port is used both for the sink and for the
	// proxy => the proxy redirects packets to the same port the sink is
	// listening.
	// -n = stdout == /Dev/null, -N => no command stream, -T => no tty
	cmd := []string{"-nNTf", "-o", "StrictHostKeyChecking=no", "-o", "ExitOnForwardFailure=yes", "-R", d.ProxyRedirectionPort + ":" + d.ProxyRedirectionAddress + ":" + monitor.SinkPort, fmt.Sprintf("%s@%s", d.Login, d.Host)}
	exCmd := exec.Command("ssh", cmd...)
	if err := exCmd.Start(); err != nil {
		dbg.Fatal("Failed to start the ssh port forwarding:", err)
	}
	if err := exCmd.Wait(); err != nil {
		dbg.Fatal("ssh port forwarding exited in failure:", err)
	}
	dbg.Lvl3("Setup remote port forwarding", cmd)
	go func() {
		err := cliutils.SshRunStdout(d.Login, d.Host, "cd remote; GOMAXPROCS=8 ./users")
		if err != nil {
			dbg.Lvl3(err)
		}
		d.sshDeter <- "finished"
	}()

	return nil
}
Beispiel #2
0
func (d *Deter) Start() error {
	dbg.Lvl1("Running with", d.Config.Nmachs, "nodes *", d.Config.Hpn, "hosts per node =",
		d.Config.Nmachs*d.Config.Hpn, "and", d.Config.Nloggers, "loggers")

	// setup port forwarding for viewing log server
	dbg.Lvl3("setup port forwarding for master logger: ", d.masterLogger, d.Login, d.Host)
	cmd := exec.Command(
		"ssh",
		"-t",
		"-t",
		fmt.Sprintf("%s@%s", d.Login, d.Host),
		"-L",
		"8081:"+d.masterLogger+":10000")
	err := cmd.Start()
	if err != nil {
		log.Fatal("failed to setup portforwarding for logging server")
	}

	dbg.Lvl3("runnning deter with nmsgs:", d.Config.Nmsgs, d.Login, d.Host)
	// run the deter lab boss nodes process
	// it will be responsible for forwarding the files and running the individual
	// timestamping servers

	go func() {
		dbg.Lvl3(cliutils.SshRunStdout(d.Login, d.Host,
			"GOMAXPROCS=8 remote/deter -nmsgs="+strconv.Itoa(d.Config.Nmsgs)+
				" -hpn="+strconv.Itoa(d.Config.Hpn)+
				" -bf="+strconv.Itoa(d.Config.Bf)+
				" -rate="+strconv.Itoa(d.Config.Rate)+
				" -rounds="+strconv.Itoa(d.Config.Rounds)+
				" -debug="+strconv.Itoa(d.Config.Debug)+
				" -failures="+strconv.Itoa(d.Config.Failures)+
				" -rfail="+strconv.Itoa(d.Config.RFail)+
				" -ffail="+strconv.Itoa(d.Config.FFail)+
				" -app="+d.Config.App+
				" -suite="+d.Config.Suite))
		dbg.Lvl3("Sending stop of ssh")
		d.sshDeter <- "stop"
	}()

	return nil
}
Beispiel #3
0
func (d *Deter) Deploy() error {
	dbg.Lvl1("Assembling all files and configuration options")
	os.RemoveAll(d.DeployDir)
	os.Mkdir(d.DeployDir, 0777)

	dbg.Lvl1("Writing config-files")

	d.generateHostsFile()
	d.readHosts()
	d.calculateGraph()
	d.WriteConfig()

	// copy the webfile-directory of the logserver to the remote directory
	err := exec.Command("cp", "-a", d.DeterDir+"/logserver/webfiles",
		d.DeterDir+"/cothority.conf", d.DeployDir).Run()
	if err != nil {
		log.Fatal("error copying webfiles:", err)
	}
	build, err := ioutil.ReadDir(d.BuildDir)
	for _, file := range build {
		err = exec.Command("cp", d.BuildDir+"/"+file.Name(), d.DeployDir).Run()
		if err != nil {
			log.Fatal("error copying build-file:", err)
		}
	}

	dbg.Lvl1("Copying over to", d.Login, "@", d.Host)
	// Copy everything over to deterlabs
	err = cliutils.Rsync(d.Login, d.Host, d.DeployDir+"/", "remote/")
	if err != nil {
		log.Fatal(err)
	}

	dbg.Lvl1("Done copying")

	dbg.Lvl3(cliutils.SshRunStdout(d.Login, d.Host,
		""))

	return nil
}
Beispiel #4
0
// Kills all eventually remaining processes from the last Deploy-run
func (d *Deterlab) Cleanup() error {
	// Cleanup eventual ssh from the proxy-forwarding to the logserver
	//err := exec.Command("kill", "-9", "$(ps x  | grep ssh | grep nNTf | cut -d' ' -f1)").Run()
	err := exec.Command("pkill", "-9", "-f", "ssh -nNTf").Run()
	if err != nil {
		dbg.Lvl3("Error stopping ssh:", err)
	}

	// SSH to the deterlab-server and end all running users-processes
	dbg.Lvl3("Going to kill everything")
	var sshKill chan string
	sshKill = make(chan string)
	go func() {
		// Cleanup eventual residues of previous round - users and sshd
		cliutils.SshRun(d.Login, d.Host, "killall -9 users sshd")
		err = cliutils.SshRunStdout(d.Login, d.Host, "test -f remote/users && ( cd remote; ./users -kill )")
		if err != nil {
			dbg.Lvl1("NOT-Normal error from cleanup")
			sshKill <- "error"
		}
		sshKill <- "stopped"
	}()

	for {
		select {
		case msg := <-sshKill:
			if msg == "stopped" {
				dbg.Lvl3("Users stopped")
				return nil
			} else {
				dbg.Lvl2("Received other command", msg, "probably the app didn't quit correctly")
			}
		case <-time.After(time.Second * 20):
			dbg.Lvl3("Timeout error when waiting for end of ssh")
			return nil
		}
	}

	return nil
}
Beispiel #5
0
func main() {
	deter, err := deploy.ReadConfig("remote")
	if err != nil {
		log.Fatal("Couldn't read config in deter:", err)
	}
	conf = deter.Config
	dbg.DebugVisible = conf.Debug

	dbg.Lvl1("running deter with nmsgs:", conf.Nmsgs, "rate:", conf.Rate, "rounds:", conf.Rounds, "debug:", conf.Debug)

	virt, err := cliutils.ReadLines("remote/virt.txt")
	if err != nil {
		log.Fatal(err)
	}
	phys, err := cliutils.ReadLines("remote/phys.txt")
	if err != nil {
		log.Fatal(err)
	}
	vpmap := make(map[string]string)
	for i := range virt {
		vpmap[virt[i]] = phys[i]
	}
	// kill old processes
	var wg sync.WaitGroup
	doneHosts := make([]bool, len(phys))
	for i, h := range phys {
		wg.Add(1)
		go func(i int, h string) {
			defer wg.Done()
			dbg.Lvl4("Cleaning up host", h)
			cliutils.SshRun("", h, "sudo killall app forkexec logserver timeclient scp ssh 2>/dev/null >/dev/null")
			time.Sleep(1 * time.Second)
			cliutils.SshRun("", h, "sudo killall app 2>/dev/null >/dev/null")
			if dbg.DebugVisible > 3 {
				dbg.Lvl4("Killing report:")
				cliutils.SshRunStdout("", h, "ps ax")
			}
			doneHosts[i] = true
			dbg.Lvl3("Host", h, "cleaned up")
		}(i, h)
	}

	cleanupChannel := make(chan string)
	go func() {
		wg.Wait()
		dbg.Lvl3("Done waiting")
		cleanupChannel <- "done"
	}()
	select {
	case msg := <-cleanupChannel:
		dbg.Lvl3("Received msg from cleanupChannel", msg)
	case <-time.After(time.Second * 10):
		for i, m := range doneHosts {
			if !m {
				dbg.Lvl1("Missing host:", phys[i])
			}
		}
		dbg.Fatal("Didn't receive all replies.")
	}

	if kill {
		dbg.Lvl1("Returning only from cleanup")
		return
	}

	/*
		 * Why copy the stuff to the other nodes? We have NFS, no?
		for _, h := range phys {
			wg.Add(1)
			go func(h string) {
				defer wg.Done()
				cliutils.Rsync("", h, "remote", "")
			}(h)
		}
		wg.Wait()
	*/

	nloggers := conf.Nloggers
	masterLogger := phys[0]
	loggers := []string{masterLogger}
	dbg.Lvl3("Going to create", nloggers, "loggers")
	for n := 1; n < nloggers; n++ {
		loggers = append(loggers, phys[n])
	}

	phys = phys[nloggers:]
	virt = virt[nloggers:]

	// Read in and parse the configuration file
	file, err := ioutil.ReadFile("remote/tree.json")
	if err != nil {
		log.Fatal("deter.go: error reading configuration file: %v\n", err)
	}
	dbg.Lvl4("cfg file:", string(file))
	var cf config.ConfigFile
	err = json.Unmarshal(file, &cf)
	if err != nil {
		log.Fatal("unable to unmarshal config.ConfigFile:", err)
	}

	hostnames := cf.Hosts
	dbg.Lvl4("hostnames:", hostnames)

	depth := graphs.Depth(cf.Tree)
	var random_leaf string
	cf.Tree.TraverseTree(func(t *graphs.Tree) {
		if random_leaf != "" {
			return
		}
		if len(t.Children) == 0 {
			random_leaf = t.Name
		}
	})

	rootname = hostnames[0]

	dbg.Lvl4("depth of tree:", depth)

	// mapping from physical node name to the timestamp servers that are running there
	// essentially a reverse mapping of vpmap except ports are also used
	physToServer := make(map[string][]string)
	for _, virt := range hostnames {
		v, _, _ := net.SplitHostPort(virt)
		p := vpmap[v]
		ss := physToServer[p]
		ss = append(ss, virt)
		physToServer[p] = ss
	}

	// start up the logging server on the final host at port 10000
	dbg.Lvl1("starting up logservers: ", loggers)
	// start up the master logger
	loggerports := make([]string, len(loggers))
	for i, logger := range loggers {
		loggerport := logger + ":10000"
		loggerports[i] = loggerport
		// redirect to the master logger
		master := masterLogger + ":10000"
		// if this is the master logger than don't set the master to anything
		if loggerport == masterLogger+":10000" {
			master = ""
		}

		// Copy configuration file to make higher file-limits
		err = cliutils.SshRunStdout("", logger, "sudo cp remote/cothority.conf /etc/security/limits.d")

		if err != nil {
			log.Fatal("Couldn't copy limit-file:", err)
		}

		go cliutils.SshRunStdout("", logger, "cd remote; sudo ./logserver -addr="+loggerport+
			" -master="+master)
	}

	i := 0
	// For coll_stamp we have to wait for everything in place which takes quite some time
	// We set up a directory and every host writes a file once he's ready to listen
	// When everybody is ready, the directory is deleted and the test starts
	coll_stamp_dir := "remote/coll_stamp_up"
	if conf.App == "coll_stamp" || conf.App == "coll_sign" {
		os.RemoveAll(coll_stamp_dir)
		os.MkdirAll(coll_stamp_dir, 0777)
		time.Sleep(time.Second)
	}
	dbg.Lvl1("starting", len(physToServer), "forkexecs")
	totalServers := 0
	for phys, virts := range physToServer {
		if len(virts) == 0 {
			continue
		}
		totalServers += len(virts)
		dbg.Lvl1("Launching forkexec for", len(virts), "clients on", phys)
		//cmd := GenExecCmd(phys, virts, loggerports[i], random_leaf)
		i = (i + 1) % len(loggerports)
		wg.Add(1)
		go func(phys string) {
			//dbg.Lvl4("running on ", phys, cmd)
			defer wg.Done()
			dbg.Lvl4("Starting servers on physical machine ", phys)
			err := cliutils.SshRunStdout("", phys, "cd remote; sudo ./forkexec"+
				" -physaddr="+phys+" -logger="+loggerports[i])
			if err != nil {
				log.Fatal("Error starting timestamper:", err, phys)
			}
			dbg.Lvl4("Finished with Timestamper", phys)
		}(phys)
	}

	if conf.App == "coll_stamp" || conf.App == "coll_sign" {
		// Every stampserver that started up (mostly waiting for configuration-reading)
		// writes its name in coll_stamp_dir - once everybody is there, the directory
		// is cleaned to flag it's OK to go on.
		start_config := time.Now()
		for {
			files, err := ioutil.ReadDir(coll_stamp_dir)
			if err != nil {
				log.Fatal("Couldn't read directory", coll_stamp_dir, err)
			} else {
				dbg.Lvl1("Stampservers started:", len(files), "/", totalServers, "after", time.Since(start_config))
				if len(files) == totalServers {
					os.RemoveAll(coll_stamp_dir)
					// 1st second for everybody to see the deleted directory
					// 2nd second for everybody to start up listening
					time.Sleep(2 * time.Second)
					break
				}
			}
			time.Sleep(time.Second)
		}
	}

	switch conf.App {
	case "coll_stamp":
		dbg.Lvl1("starting", len(physToServer), "time clients")
		// start up one timeclient per physical machine
		// it requests timestamps from all the servers on that machine
		for p, ss := range physToServer {
			if len(ss) == 0 {
				continue
			}
			servers := strings.Join(ss, ",")
			go func(i int, p string) {
				_, err := cliutils.SshRun("", p, "cd remote; sudo ./app -mode=client -app="+conf.App+
					" -name=client@"+p+
					" -server="+servers+
					" -logger="+loggerports[i])
				if err != nil {
					dbg.Lvl4("Deter.go : timeclient error ", err)
				}
				dbg.Lvl4("Deter.go : Finished with timeclient", p)
			}(i, p)
			i = (i + 1) % len(loggerports)
		}
	case "coll_sign_no":
		// TODO: for now it's only a simple startup from the server
		dbg.Lvl1("Starting only one client")
		/*
			p := physToServer[0][0]
			servers := strings.Join(physToServer[0][1], ",")
			_, err = cliutils.SshRun("", p, "cd remote; sudo ./app -mode=client -app=" + conf.App +
			" -name=client@" + p +
			" -server=" + servers +
			" -logger=" + loggerports[i])
			i = (i + 1) % len(loggerports)
		*/
	}

	// wait for the servers to finish before stopping
	wg.Wait()
	//time.Sleep(10 * time.Minute)
}
Beispiel #6
0
func main() {
	deterlab.ReadConfig()
	flag.Parse()

	vpmap := make(map[string]string)
	for i := range deterlab.Virt {
		vpmap[deterlab.Virt[i]] = deterlab.Phys[i]
	}
	// kill old processes
	var wg sync.WaitGroup
	re := regexp.MustCompile(" +")
	hosts, err := exec.Command("/usr/testbed/bin/node_list", "-e", deterlab.Project+","+deterlab.Experiment).Output()
	if err != nil {
		dbg.Fatal("Deterlab experiment", deterlab.Project+"/"+deterlab.Experiment, "seems not to be swapped in. Aborting.")
		os.Exit(-1)
	}
	hosts_trimmed := strings.TrimSpace(re.ReplaceAllString(string(hosts), " "))
	hostlist := strings.Split(hosts_trimmed, " ")
	doneHosts := make([]bool, len(hostlist))
	dbg.Lvl2("Found the following hosts:", hostlist)
	if kill {
		dbg.Lvl1("Cleaning up", len(hostlist), "hosts.")
	}
	for i, h := range hostlist {
		wg.Add(1)
		go func(i int, h string) {
			defer wg.Done()
			if kill {
				dbg.Lvl4("Cleaning up host", h, ".")
				cliutils.SshRun("", h, "sudo killall -9 "+deterlab.App+" logserver forkexec timeclient scp 2>/dev/null >/dev/null")
				time.Sleep(1 * time.Second)
				cliutils.SshRun("", h, "sudo killall -9 "+deterlab.App+" 2>/dev/null >/dev/null")
				time.Sleep(1 * time.Second)
				// Also kill all other process that start with "./" and are probably
				// locally started processes
				cliutils.SshRun("", h, "sudo pkill -9 -f '\\./'")
				time.Sleep(1 * time.Second)
				if dbg.DebugVisible > 3 {
					dbg.Lvl4("Cleaning report:")
					cliutils.SshRunStdout("", h, "ps aux")
				}
			} else {
				dbg.Lvl3("Setting the file-limit higher on", h)

				// Copy configuration file to make higher file-limits
				err := cliutils.SshRunStdout("", h, "sudo cp remote/cothority.conf /etc/security/limits.d")
				if err != nil {
					dbg.Fatal("Couldn't copy limit-file:", err)
				}
			}
			doneHosts[i] = true
			dbg.Lvl3("Host", h, "cleaned up")
		}(i, h)
	}

	cleanupChannel := make(chan string)
	go func() {
		wg.Wait()
		dbg.Lvl3("Done waiting")
		cleanupChannel <- "done"
	}()
	select {
	case msg := <-cleanupChannel:
		dbg.Lvl3("Received msg from cleanupChannel", msg)
	case <-time.After(time.Second * 20):
		for i, m := range doneHosts {
			if !m {
				dbg.Lvl1("Missing host:", hostlist[i], "- You should run")
				dbg.Lvl1("/usr/testbed/bin/node_reboot", hostlist[i])
			}
		}
		dbg.Fatal("Didn't receive all replies while cleaning up - aborting.")
	}

	if kill {
		dbg.Lvl2("Only cleaning up - returning")
		return
	}

	// ADDITIONS : the monitoring part
	// Proxy will listen on Sink:SinkPort and redirect every packet to
	// RedirectionAddress:RedirectionPort. With remote tunnel forwarding it will
	// be forwarded to the real sink
	dbg.Lvl2("Launching proxy redirecting to", deterlab.ProxyRedirectionAddress, ":", deterlab.ProxyRedirectionPort)
	go monitor.Proxy(deterlab.ProxyRedirectionAddress + ":" + deterlab.ProxyRedirectionPort)
	time.Sleep(time.Second)

	hostnames := deterlab.Hostnames
	dbg.Lvl4("hostnames:", hostnames)

	// mapping from physical node name to the timestamp servers that are running there
	// essentially a reverse mapping of vpmap except ports are also used
	physToServer := make(map[string][]string)
	for _, virt := range hostnames {
		v, _, _ := net.SplitHostPort(virt)
		p := vpmap[v]
		ss := physToServer[p]
		ss = append(ss, virt)
		physToServer[p] = ss
	}

	monitorAddr := deterlab.MonitorAddress + ":" + monitor.SinkPort
	servers := len(physToServer)
	ppm := len(deterlab.Hostnames) / servers
	dbg.Lvl1("starting", servers, "forkexecs with", ppm, "processes each =", servers*ppm)
	totalServers := 0
	for phys, virts := range physToServer {
		if len(virts) == 0 {
			continue
		}
		totalServers += len(virts)
		dbg.Lvl2("Launching forkexec for", len(virts), "clients on", phys)
		wg.Add(1)
		go func(phys string) {
			//dbg.Lvl4("running on", phys, cmd)
			defer wg.Done()
			dbg.Lvl4("Starting servers on physical machine", phys, "with logger =", deterlab.MonitorAddress+":"+monitor.SinkPort)
			err := cliutils.SshRunStdout("", phys, "cd remote; sudo ./forkexec"+
				" -physaddr="+phys+" -logger="+deterlab.MonitorAddress+":"+monitor.SinkPort)
			if err != nil {
				dbg.Lvl1("Error starting timestamper:", err, phys)
			}
			dbg.Lvl4("Finished with Timestamper", phys)
		}(phys)
	}

	if deterlab.App == "stamp" || deterlab.App == "sign" {
		// Every stampserver that started up (mostly waiting for configuration-reading)
		// writes its name in coll_stamp_dir - once everybody is there, the directory
		// is cleaned to flag it's OK to go on.
		start_config := time.Now()
		for {
			s, err := monitor.GetReady(monitorAddr)
			if err != nil {
				log.Fatal("Couldn't contact monitor")
			} else {
				dbg.Lvl1("Processes started:", s.Ready, "/", totalServers, "after", time.Since(start_config))
				if s.Ready == totalServers {
					dbg.Lvl2("Everybody ready, starting")
					// 1st second for everybody to see the deleted directory
					// 2nd second for everybody to start up listening
					time.Sleep(time.Second * 2)
					break
				}
			}
			time.Sleep(time.Second)
		}
	}

	switch deterlab.App {
	case "stamp":
		dbg.Lvl1("starting", len(physToServer), "time clients")
		// start up one timeclient per physical machine
		// it requests timestamps from all the servers on that machine
		amroot := true
		for p, ss := range physToServer {
			if len(ss) == 0 {
				dbg.Lvl3("ss is empty - not starting")
				continue
			}
			servers := strings.Join(ss, ",")
			dbg.Lvl3("Starting with ss=", ss)
			go func(p string, a bool) {
				cmdstr := "cd remote; sudo ./" + deterlab.App + " -mode=client " +
					" -name=client@" + p +
					" -server=" + servers +
					" -amroot=" + strconv.FormatBool(a)
				dbg.Lvl3("Users will launch client:", cmdstr)
				err := cliutils.SshRunStdout("", p, cmdstr)
				if err != nil {
					dbg.Lvl4("Deter.go: error for", deterlab.App, err)
				}
				dbg.Lvl4("Deter.go: Finished with", deterlab.App, p)
			}(p, amroot)
			amroot = false
		}
	case "sign_no":
		// TODO: for now it's only a simple startup from the server
		dbg.Lvl1("Starting only one client")
	}

	// wait for the servers to finish before stopping
	wg.Wait()
}