// Kills all eventually remaining processes from the last Deploy-run func (d *Deterlab) Cleanup() error { // Cleanup eventual ssh from the proxy-forwarding to the logserver //err := exec.Command("kill", "-9", "$(ps x | grep ssh | grep nNTf | cut -d' ' -f1)").Run() err := exec.Command("pkill", "-9", "-f", "ssh -nNTf").Run() if err != nil { dbg.Lvl3("Error stopping ssh:", err) } // SSH to the deterlab-server and end all running users-processes dbg.Lvl3("Going to kill everything") var sshKill chan string sshKill = make(chan string) go func() { // Cleanup eventual residues of previous round - users and sshd cliutils.SshRun(d.Login, d.Host, "killall -9 users sshd") err = cliutils.SshRunStdout(d.Login, d.Host, "test -f remote/users && ( cd remote; ./users -kill )") if err != nil { dbg.Lvl1("NOT-Normal error from cleanup") sshKill <- "error" } sshKill <- "stopped" }() for { select { case msg := <-sshKill: if msg == "stopped" { dbg.Lvl3("Users stopped") return nil } else { dbg.Lvl2("Received other command", msg, "probably the app didn't quit correctly") } case <-time.After(time.Second * 20): dbg.Lvl3("Timeout error when waiting for end of ssh") return nil } } return nil }
func main() { deter, err := deploy.ReadConfig("remote") if err != nil { log.Fatal("Couldn't read config in deter:", err) } conf = deter.Config dbg.DebugVisible = conf.Debug dbg.Lvl1("running deter with nmsgs:", conf.Nmsgs, "rate:", conf.Rate, "rounds:", conf.Rounds, "debug:", conf.Debug) virt, err := cliutils.ReadLines("remote/virt.txt") if err != nil { log.Fatal(err) } phys, err := cliutils.ReadLines("remote/phys.txt") if err != nil { log.Fatal(err) } vpmap := make(map[string]string) for i := range virt { vpmap[virt[i]] = phys[i] } // kill old processes var wg sync.WaitGroup doneHosts := make([]bool, len(phys)) for i, h := range phys { wg.Add(1) go func(i int, h string) { defer wg.Done() dbg.Lvl4("Cleaning up host", h) cliutils.SshRun("", h, "sudo killall app forkexec logserver timeclient scp ssh 2>/dev/null >/dev/null") time.Sleep(1 * time.Second) cliutils.SshRun("", h, "sudo killall app 2>/dev/null >/dev/null") if dbg.DebugVisible > 3 { dbg.Lvl4("Killing report:") cliutils.SshRunStdout("", h, "ps ax") } doneHosts[i] = true dbg.Lvl3("Host", h, "cleaned up") }(i, h) } cleanupChannel := make(chan string) go func() { wg.Wait() dbg.Lvl3("Done waiting") cleanupChannel <- "done" }() select { case msg := <-cleanupChannel: dbg.Lvl3("Received msg from cleanupChannel", msg) case <-time.After(time.Second * 10): for i, m := range doneHosts { if !m { dbg.Lvl1("Missing host:", phys[i]) } } dbg.Fatal("Didn't receive all replies.") } if kill { dbg.Lvl1("Returning only from cleanup") return } /* * Why copy the stuff to the other nodes? We have NFS, no? for _, h := range phys { wg.Add(1) go func(h string) { defer wg.Done() cliutils.Rsync("", h, "remote", "") }(h) } wg.Wait() */ nloggers := conf.Nloggers masterLogger := phys[0] loggers := []string{masterLogger} dbg.Lvl3("Going to create", nloggers, "loggers") for n := 1; n < nloggers; n++ { loggers = append(loggers, phys[n]) } phys = phys[nloggers:] virt = virt[nloggers:] // Read in and parse the configuration file file, err := ioutil.ReadFile("remote/tree.json") if err != nil { log.Fatal("deter.go: error reading configuration file: %v\n", err) } dbg.Lvl4("cfg file:", string(file)) var cf config.ConfigFile err = json.Unmarshal(file, &cf) if err != nil { log.Fatal("unable to unmarshal config.ConfigFile:", err) } hostnames := cf.Hosts dbg.Lvl4("hostnames:", hostnames) depth := graphs.Depth(cf.Tree) var random_leaf string cf.Tree.TraverseTree(func(t *graphs.Tree) { if random_leaf != "" { return } if len(t.Children) == 0 { random_leaf = t.Name } }) rootname = hostnames[0] dbg.Lvl4("depth of tree:", depth) // mapping from physical node name to the timestamp servers that are running there // essentially a reverse mapping of vpmap except ports are also used physToServer := make(map[string][]string) for _, virt := range hostnames { v, _, _ := net.SplitHostPort(virt) p := vpmap[v] ss := physToServer[p] ss = append(ss, virt) physToServer[p] = ss } // start up the logging server on the final host at port 10000 dbg.Lvl1("starting up logservers: ", loggers) // start up the master logger loggerports := make([]string, len(loggers)) for i, logger := range loggers { loggerport := logger + ":10000" loggerports[i] = loggerport // redirect to the master logger master := masterLogger + ":10000" // if this is the master logger than don't set the master to anything if loggerport == masterLogger+":10000" { master = "" } // Copy configuration file to make higher file-limits err = cliutils.SshRunStdout("", logger, "sudo cp remote/cothority.conf /etc/security/limits.d") if err != nil { log.Fatal("Couldn't copy limit-file:", err) } go cliutils.SshRunStdout("", logger, "cd remote; sudo ./logserver -addr="+loggerport+ " -master="+master) } i := 0 // For coll_stamp we have to wait for everything in place which takes quite some time // We set up a directory and every host writes a file once he's ready to listen // When everybody is ready, the directory is deleted and the test starts coll_stamp_dir := "remote/coll_stamp_up" if conf.App == "coll_stamp" || conf.App == "coll_sign" { os.RemoveAll(coll_stamp_dir) os.MkdirAll(coll_stamp_dir, 0777) time.Sleep(time.Second) } dbg.Lvl1("starting", len(physToServer), "forkexecs") totalServers := 0 for phys, virts := range physToServer { if len(virts) == 0 { continue } totalServers += len(virts) dbg.Lvl1("Launching forkexec for", len(virts), "clients on", phys) //cmd := GenExecCmd(phys, virts, loggerports[i], random_leaf) i = (i + 1) % len(loggerports) wg.Add(1) go func(phys string) { //dbg.Lvl4("running on ", phys, cmd) defer wg.Done() dbg.Lvl4("Starting servers on physical machine ", phys) err := cliutils.SshRunStdout("", phys, "cd remote; sudo ./forkexec"+ " -physaddr="+phys+" -logger="+loggerports[i]) if err != nil { log.Fatal("Error starting timestamper:", err, phys) } dbg.Lvl4("Finished with Timestamper", phys) }(phys) } if conf.App == "coll_stamp" || conf.App == "coll_sign" { // Every stampserver that started up (mostly waiting for configuration-reading) // writes its name in coll_stamp_dir - once everybody is there, the directory // is cleaned to flag it's OK to go on. start_config := time.Now() for { files, err := ioutil.ReadDir(coll_stamp_dir) if err != nil { log.Fatal("Couldn't read directory", coll_stamp_dir, err) } else { dbg.Lvl1("Stampservers started:", len(files), "/", totalServers, "after", time.Since(start_config)) if len(files) == totalServers { os.RemoveAll(coll_stamp_dir) // 1st second for everybody to see the deleted directory // 2nd second for everybody to start up listening time.Sleep(2 * time.Second) break } } time.Sleep(time.Second) } } switch conf.App { case "coll_stamp": dbg.Lvl1("starting", len(physToServer), "time clients") // start up one timeclient per physical machine // it requests timestamps from all the servers on that machine for p, ss := range physToServer { if len(ss) == 0 { continue } servers := strings.Join(ss, ",") go func(i int, p string) { _, err := cliutils.SshRun("", p, "cd remote; sudo ./app -mode=client -app="+conf.App+ " -name=client@"+p+ " -server="+servers+ " -logger="+loggerports[i]) if err != nil { dbg.Lvl4("Deter.go : timeclient error ", err) } dbg.Lvl4("Deter.go : Finished with timeclient", p) }(i, p) i = (i + 1) % len(loggerports) } case "coll_sign_no": // TODO: for now it's only a simple startup from the server dbg.Lvl1("Starting only one client") /* p := physToServer[0][0] servers := strings.Join(physToServer[0][1], ",") _, err = cliutils.SshRun("", p, "cd remote; sudo ./app -mode=client -app=" + conf.App + " -name=client@" + p + " -server=" + servers + " -logger=" + loggerports[i]) i = (i + 1) % len(loggerports) */ } // wait for the servers to finish before stopping wg.Wait() //time.Sleep(10 * time.Minute) }
func main() { deterlab.ReadConfig() flag.Parse() vpmap := make(map[string]string) for i := range deterlab.Virt { vpmap[deterlab.Virt[i]] = deterlab.Phys[i] } // kill old processes var wg sync.WaitGroup re := regexp.MustCompile(" +") hosts, err := exec.Command("/usr/testbed/bin/node_list", "-e", deterlab.Project+","+deterlab.Experiment).Output() if err != nil { dbg.Fatal("Deterlab experiment", deterlab.Project+"/"+deterlab.Experiment, "seems not to be swapped in. Aborting.") os.Exit(-1) } hosts_trimmed := strings.TrimSpace(re.ReplaceAllString(string(hosts), " ")) hostlist := strings.Split(hosts_trimmed, " ") doneHosts := make([]bool, len(hostlist)) dbg.Lvl2("Found the following hosts:", hostlist) if kill { dbg.Lvl1("Cleaning up", len(hostlist), "hosts.") } for i, h := range hostlist { wg.Add(1) go func(i int, h string) { defer wg.Done() if kill { dbg.Lvl4("Cleaning up host", h, ".") cliutils.SshRun("", h, "sudo killall -9 "+deterlab.App+" logserver forkexec timeclient scp 2>/dev/null >/dev/null") time.Sleep(1 * time.Second) cliutils.SshRun("", h, "sudo killall -9 "+deterlab.App+" 2>/dev/null >/dev/null") time.Sleep(1 * time.Second) // Also kill all other process that start with "./" and are probably // locally started processes cliutils.SshRun("", h, "sudo pkill -9 -f '\\./'") time.Sleep(1 * time.Second) if dbg.DebugVisible > 3 { dbg.Lvl4("Cleaning report:") cliutils.SshRunStdout("", h, "ps aux") } } else { dbg.Lvl3("Setting the file-limit higher on", h) // Copy configuration file to make higher file-limits err := cliutils.SshRunStdout("", h, "sudo cp remote/cothority.conf /etc/security/limits.d") if err != nil { dbg.Fatal("Couldn't copy limit-file:", err) } } doneHosts[i] = true dbg.Lvl3("Host", h, "cleaned up") }(i, h) } cleanupChannel := make(chan string) go func() { wg.Wait() dbg.Lvl3("Done waiting") cleanupChannel <- "done" }() select { case msg := <-cleanupChannel: dbg.Lvl3("Received msg from cleanupChannel", msg) case <-time.After(time.Second * 20): for i, m := range doneHosts { if !m { dbg.Lvl1("Missing host:", hostlist[i], "- You should run") dbg.Lvl1("/usr/testbed/bin/node_reboot", hostlist[i]) } } dbg.Fatal("Didn't receive all replies while cleaning up - aborting.") } if kill { dbg.Lvl2("Only cleaning up - returning") return } // ADDITIONS : the monitoring part // Proxy will listen on Sink:SinkPort and redirect every packet to // RedirectionAddress:RedirectionPort. With remote tunnel forwarding it will // be forwarded to the real sink dbg.Lvl2("Launching proxy redirecting to", deterlab.ProxyRedirectionAddress, ":", deterlab.ProxyRedirectionPort) go monitor.Proxy(deterlab.ProxyRedirectionAddress + ":" + deterlab.ProxyRedirectionPort) time.Sleep(time.Second) hostnames := deterlab.Hostnames dbg.Lvl4("hostnames:", hostnames) // mapping from physical node name to the timestamp servers that are running there // essentially a reverse mapping of vpmap except ports are also used physToServer := make(map[string][]string) for _, virt := range hostnames { v, _, _ := net.SplitHostPort(virt) p := vpmap[v] ss := physToServer[p] ss = append(ss, virt) physToServer[p] = ss } monitorAddr := deterlab.MonitorAddress + ":" + monitor.SinkPort servers := len(physToServer) ppm := len(deterlab.Hostnames) / servers dbg.Lvl1("starting", servers, "forkexecs with", ppm, "processes each =", servers*ppm) totalServers := 0 for phys, virts := range physToServer { if len(virts) == 0 { continue } totalServers += len(virts) dbg.Lvl2("Launching forkexec for", len(virts), "clients on", phys) wg.Add(1) go func(phys string) { //dbg.Lvl4("running on", phys, cmd) defer wg.Done() dbg.Lvl4("Starting servers on physical machine", phys, "with logger =", deterlab.MonitorAddress+":"+monitor.SinkPort) err := cliutils.SshRunStdout("", phys, "cd remote; sudo ./forkexec"+ " -physaddr="+phys+" -logger="+deterlab.MonitorAddress+":"+monitor.SinkPort) if err != nil { dbg.Lvl1("Error starting timestamper:", err, phys) } dbg.Lvl4("Finished with Timestamper", phys) }(phys) } if deterlab.App == "stamp" || deterlab.App == "sign" { // Every stampserver that started up (mostly waiting for configuration-reading) // writes its name in coll_stamp_dir - once everybody is there, the directory // is cleaned to flag it's OK to go on. start_config := time.Now() for { s, err := monitor.GetReady(monitorAddr) if err != nil { log.Fatal("Couldn't contact monitor") } else { dbg.Lvl1("Processes started:", s.Ready, "/", totalServers, "after", time.Since(start_config)) if s.Ready == totalServers { dbg.Lvl2("Everybody ready, starting") // 1st second for everybody to see the deleted directory // 2nd second for everybody to start up listening time.Sleep(time.Second * 2) break } } time.Sleep(time.Second) } } switch deterlab.App { case "stamp": dbg.Lvl1("starting", len(physToServer), "time clients") // start up one timeclient per physical machine // it requests timestamps from all the servers on that machine amroot := true for p, ss := range physToServer { if len(ss) == 0 { dbg.Lvl3("ss is empty - not starting") continue } servers := strings.Join(ss, ",") dbg.Lvl3("Starting with ss=", ss) go func(p string, a bool) { cmdstr := "cd remote; sudo ./" + deterlab.App + " -mode=client " + " -name=client@" + p + " -server=" + servers + " -amroot=" + strconv.FormatBool(a) dbg.Lvl3("Users will launch client:", cmdstr) err := cliutils.SshRunStdout("", p, cmdstr) if err != nil { dbg.Lvl4("Deter.go: error for", deterlab.App, err) } dbg.Lvl4("Deter.go: Finished with", deterlab.App, p) }(p, amroot) amroot = false } case "sign_no": // TODO: for now it's only a simple startup from the server dbg.Lvl1("Starting only one client") } // wait for the servers to finish before stopping wg.Wait() }