func (d *Deterlab) Start(args ...string) error { // setup port forwarding for viewing log server d.started = true // Remote tunneling : the sink port is used both for the sink and for the // proxy => the proxy redirects packets to the same port the sink is // listening. // -n = stdout == /Dev/null, -N => no command stream, -T => no tty cmd := []string{"-nNTf", "-o", "StrictHostKeyChecking=no", "-o", "ExitOnForwardFailure=yes", "-R", d.ProxyRedirectionPort + ":" + d.ProxyRedirectionAddress + ":" + monitor.SinkPort, fmt.Sprintf("%s@%s", d.Login, d.Host)} exCmd := exec.Command("ssh", cmd...) if err := exCmd.Start(); err != nil { dbg.Fatal("Failed to start the ssh port forwarding:", err) } if err := exCmd.Wait(); err != nil { dbg.Fatal("ssh port forwarding exited in failure:", err) } dbg.Lvl3("Setup remote port forwarding", cmd) go func() { err := cliutils.SshRunStdout(d.Login, d.Host, "cd remote; GOMAXPROCS=8 ./users") if err != nil { dbg.Lvl3(err) } d.sshDeter <- "finished" }() return nil }
func (d *Deter) Start() error { dbg.Lvl1("Running with", d.Config.Nmachs, "nodes *", d.Config.Hpn, "hosts per node =", d.Config.Nmachs*d.Config.Hpn, "and", d.Config.Nloggers, "loggers") // setup port forwarding for viewing log server dbg.Lvl3("setup port forwarding for master logger: ", d.masterLogger, d.Login, d.Host) cmd := exec.Command( "ssh", "-t", "-t", fmt.Sprintf("%s@%s", d.Login, d.Host), "-L", "8081:"+d.masterLogger+":10000") err := cmd.Start() if err != nil { log.Fatal("failed to setup portforwarding for logging server") } dbg.Lvl3("runnning deter with nmsgs:", d.Config.Nmsgs, d.Login, d.Host) // run the deter lab boss nodes process // it will be responsible for forwarding the files and running the individual // timestamping servers go func() { dbg.Lvl3(cliutils.SshRunStdout(d.Login, d.Host, "GOMAXPROCS=8 remote/deter -nmsgs="+strconv.Itoa(d.Config.Nmsgs)+ " -hpn="+strconv.Itoa(d.Config.Hpn)+ " -bf="+strconv.Itoa(d.Config.Bf)+ " -rate="+strconv.Itoa(d.Config.Rate)+ " -rounds="+strconv.Itoa(d.Config.Rounds)+ " -debug="+strconv.Itoa(d.Config.Debug)+ " -failures="+strconv.Itoa(d.Config.Failures)+ " -rfail="+strconv.Itoa(d.Config.RFail)+ " -ffail="+strconv.Itoa(d.Config.FFail)+ " -app="+d.Config.App+ " -suite="+d.Config.Suite)) dbg.Lvl3("Sending stop of ssh") d.sshDeter <- "stop" }() return nil }
func (d *Deter) Deploy() error { dbg.Lvl1("Assembling all files and configuration options") os.RemoveAll(d.DeployDir) os.Mkdir(d.DeployDir, 0777) dbg.Lvl1("Writing config-files") d.generateHostsFile() d.readHosts() d.calculateGraph() d.WriteConfig() // copy the webfile-directory of the logserver to the remote directory err := exec.Command("cp", "-a", d.DeterDir+"/logserver/webfiles", d.DeterDir+"/cothority.conf", d.DeployDir).Run() if err != nil { log.Fatal("error copying webfiles:", err) } build, err := ioutil.ReadDir(d.BuildDir) for _, file := range build { err = exec.Command("cp", d.BuildDir+"/"+file.Name(), d.DeployDir).Run() if err != nil { log.Fatal("error copying build-file:", err) } } dbg.Lvl1("Copying over to", d.Login, "@", d.Host) // Copy everything over to deterlabs err = cliutils.Rsync(d.Login, d.Host, d.DeployDir+"/", "remote/") if err != nil { log.Fatal(err) } dbg.Lvl1("Done copying") dbg.Lvl3(cliutils.SshRunStdout(d.Login, d.Host, "")) return nil }
// Kills all eventually remaining processes from the last Deploy-run func (d *Deterlab) Cleanup() error { // Cleanup eventual ssh from the proxy-forwarding to the logserver //err := exec.Command("kill", "-9", "$(ps x | grep ssh | grep nNTf | cut -d' ' -f1)").Run() err := exec.Command("pkill", "-9", "-f", "ssh -nNTf").Run() if err != nil { dbg.Lvl3("Error stopping ssh:", err) } // SSH to the deterlab-server and end all running users-processes dbg.Lvl3("Going to kill everything") var sshKill chan string sshKill = make(chan string) go func() { // Cleanup eventual residues of previous round - users and sshd cliutils.SshRun(d.Login, d.Host, "killall -9 users sshd") err = cliutils.SshRunStdout(d.Login, d.Host, "test -f remote/users && ( cd remote; ./users -kill )") if err != nil { dbg.Lvl1("NOT-Normal error from cleanup") sshKill <- "error" } sshKill <- "stopped" }() for { select { case msg := <-sshKill: if msg == "stopped" { dbg.Lvl3("Users stopped") return nil } else { dbg.Lvl2("Received other command", msg, "probably the app didn't quit correctly") } case <-time.After(time.Second * 20): dbg.Lvl3("Timeout error when waiting for end of ssh") return nil } } return nil }
func main() { deter, err := deploy.ReadConfig("remote") if err != nil { log.Fatal("Couldn't read config in deter:", err) } conf = deter.Config dbg.DebugVisible = conf.Debug dbg.Lvl1("running deter with nmsgs:", conf.Nmsgs, "rate:", conf.Rate, "rounds:", conf.Rounds, "debug:", conf.Debug) virt, err := cliutils.ReadLines("remote/virt.txt") if err != nil { log.Fatal(err) } phys, err := cliutils.ReadLines("remote/phys.txt") if err != nil { log.Fatal(err) } vpmap := make(map[string]string) for i := range virt { vpmap[virt[i]] = phys[i] } // kill old processes var wg sync.WaitGroup doneHosts := make([]bool, len(phys)) for i, h := range phys { wg.Add(1) go func(i int, h string) { defer wg.Done() dbg.Lvl4("Cleaning up host", h) cliutils.SshRun("", h, "sudo killall app forkexec logserver timeclient scp ssh 2>/dev/null >/dev/null") time.Sleep(1 * time.Second) cliutils.SshRun("", h, "sudo killall app 2>/dev/null >/dev/null") if dbg.DebugVisible > 3 { dbg.Lvl4("Killing report:") cliutils.SshRunStdout("", h, "ps ax") } doneHosts[i] = true dbg.Lvl3("Host", h, "cleaned up") }(i, h) } cleanupChannel := make(chan string) go func() { wg.Wait() dbg.Lvl3("Done waiting") cleanupChannel <- "done" }() select { case msg := <-cleanupChannel: dbg.Lvl3("Received msg from cleanupChannel", msg) case <-time.After(time.Second * 10): for i, m := range doneHosts { if !m { dbg.Lvl1("Missing host:", phys[i]) } } dbg.Fatal("Didn't receive all replies.") } if kill { dbg.Lvl1("Returning only from cleanup") return } /* * Why copy the stuff to the other nodes? We have NFS, no? for _, h := range phys { wg.Add(1) go func(h string) { defer wg.Done() cliutils.Rsync("", h, "remote", "") }(h) } wg.Wait() */ nloggers := conf.Nloggers masterLogger := phys[0] loggers := []string{masterLogger} dbg.Lvl3("Going to create", nloggers, "loggers") for n := 1; n < nloggers; n++ { loggers = append(loggers, phys[n]) } phys = phys[nloggers:] virt = virt[nloggers:] // Read in and parse the configuration file file, err := ioutil.ReadFile("remote/tree.json") if err != nil { log.Fatal("deter.go: error reading configuration file: %v\n", err) } dbg.Lvl4("cfg file:", string(file)) var cf config.ConfigFile err = json.Unmarshal(file, &cf) if err != nil { log.Fatal("unable to unmarshal config.ConfigFile:", err) } hostnames := cf.Hosts dbg.Lvl4("hostnames:", hostnames) depth := graphs.Depth(cf.Tree) var random_leaf string cf.Tree.TraverseTree(func(t *graphs.Tree) { if random_leaf != "" { return } if len(t.Children) == 0 { random_leaf = t.Name } }) rootname = hostnames[0] dbg.Lvl4("depth of tree:", depth) // mapping from physical node name to the timestamp servers that are running there // essentially a reverse mapping of vpmap except ports are also used physToServer := make(map[string][]string) for _, virt := range hostnames { v, _, _ := net.SplitHostPort(virt) p := vpmap[v] ss := physToServer[p] ss = append(ss, virt) physToServer[p] = ss } // start up the logging server on the final host at port 10000 dbg.Lvl1("starting up logservers: ", loggers) // start up the master logger loggerports := make([]string, len(loggers)) for i, logger := range loggers { loggerport := logger + ":10000" loggerports[i] = loggerport // redirect to the master logger master := masterLogger + ":10000" // if this is the master logger than don't set the master to anything if loggerport == masterLogger+":10000" { master = "" } // Copy configuration file to make higher file-limits err = cliutils.SshRunStdout("", logger, "sudo cp remote/cothority.conf /etc/security/limits.d") if err != nil { log.Fatal("Couldn't copy limit-file:", err) } go cliutils.SshRunStdout("", logger, "cd remote; sudo ./logserver -addr="+loggerport+ " -master="+master) } i := 0 // For coll_stamp we have to wait for everything in place which takes quite some time // We set up a directory and every host writes a file once he's ready to listen // When everybody is ready, the directory is deleted and the test starts coll_stamp_dir := "remote/coll_stamp_up" if conf.App == "coll_stamp" || conf.App == "coll_sign" { os.RemoveAll(coll_stamp_dir) os.MkdirAll(coll_stamp_dir, 0777) time.Sleep(time.Second) } dbg.Lvl1("starting", len(physToServer), "forkexecs") totalServers := 0 for phys, virts := range physToServer { if len(virts) == 0 { continue } totalServers += len(virts) dbg.Lvl1("Launching forkexec for", len(virts), "clients on", phys) //cmd := GenExecCmd(phys, virts, loggerports[i], random_leaf) i = (i + 1) % len(loggerports) wg.Add(1) go func(phys string) { //dbg.Lvl4("running on ", phys, cmd) defer wg.Done() dbg.Lvl4("Starting servers on physical machine ", phys) err := cliutils.SshRunStdout("", phys, "cd remote; sudo ./forkexec"+ " -physaddr="+phys+" -logger="+loggerports[i]) if err != nil { log.Fatal("Error starting timestamper:", err, phys) } dbg.Lvl4("Finished with Timestamper", phys) }(phys) } if conf.App == "coll_stamp" || conf.App == "coll_sign" { // Every stampserver that started up (mostly waiting for configuration-reading) // writes its name in coll_stamp_dir - once everybody is there, the directory // is cleaned to flag it's OK to go on. start_config := time.Now() for { files, err := ioutil.ReadDir(coll_stamp_dir) if err != nil { log.Fatal("Couldn't read directory", coll_stamp_dir, err) } else { dbg.Lvl1("Stampservers started:", len(files), "/", totalServers, "after", time.Since(start_config)) if len(files) == totalServers { os.RemoveAll(coll_stamp_dir) // 1st second for everybody to see the deleted directory // 2nd second for everybody to start up listening time.Sleep(2 * time.Second) break } } time.Sleep(time.Second) } } switch conf.App { case "coll_stamp": dbg.Lvl1("starting", len(physToServer), "time clients") // start up one timeclient per physical machine // it requests timestamps from all the servers on that machine for p, ss := range physToServer { if len(ss) == 0 { continue } servers := strings.Join(ss, ",") go func(i int, p string) { _, err := cliutils.SshRun("", p, "cd remote; sudo ./app -mode=client -app="+conf.App+ " -name=client@"+p+ " -server="+servers+ " -logger="+loggerports[i]) if err != nil { dbg.Lvl4("Deter.go : timeclient error ", err) } dbg.Lvl4("Deter.go : Finished with timeclient", p) }(i, p) i = (i + 1) % len(loggerports) } case "coll_sign_no": // TODO: for now it's only a simple startup from the server dbg.Lvl1("Starting only one client") /* p := physToServer[0][0] servers := strings.Join(physToServer[0][1], ",") _, err = cliutils.SshRun("", p, "cd remote; sudo ./app -mode=client -app=" + conf.App + " -name=client@" + p + " -server=" + servers + " -logger=" + loggerports[i]) i = (i + 1) % len(loggerports) */ } // wait for the servers to finish before stopping wg.Wait() //time.Sleep(10 * time.Minute) }
func main() { deterlab.ReadConfig() flag.Parse() vpmap := make(map[string]string) for i := range deterlab.Virt { vpmap[deterlab.Virt[i]] = deterlab.Phys[i] } // kill old processes var wg sync.WaitGroup re := regexp.MustCompile(" +") hosts, err := exec.Command("/usr/testbed/bin/node_list", "-e", deterlab.Project+","+deterlab.Experiment).Output() if err != nil { dbg.Fatal("Deterlab experiment", deterlab.Project+"/"+deterlab.Experiment, "seems not to be swapped in. Aborting.") os.Exit(-1) } hosts_trimmed := strings.TrimSpace(re.ReplaceAllString(string(hosts), " ")) hostlist := strings.Split(hosts_trimmed, " ") doneHosts := make([]bool, len(hostlist)) dbg.Lvl2("Found the following hosts:", hostlist) if kill { dbg.Lvl1("Cleaning up", len(hostlist), "hosts.") } for i, h := range hostlist { wg.Add(1) go func(i int, h string) { defer wg.Done() if kill { dbg.Lvl4("Cleaning up host", h, ".") cliutils.SshRun("", h, "sudo killall -9 "+deterlab.App+" logserver forkexec timeclient scp 2>/dev/null >/dev/null") time.Sleep(1 * time.Second) cliutils.SshRun("", h, "sudo killall -9 "+deterlab.App+" 2>/dev/null >/dev/null") time.Sleep(1 * time.Second) // Also kill all other process that start with "./" and are probably // locally started processes cliutils.SshRun("", h, "sudo pkill -9 -f '\\./'") time.Sleep(1 * time.Second) if dbg.DebugVisible > 3 { dbg.Lvl4("Cleaning report:") cliutils.SshRunStdout("", h, "ps aux") } } else { dbg.Lvl3("Setting the file-limit higher on", h) // Copy configuration file to make higher file-limits err := cliutils.SshRunStdout("", h, "sudo cp remote/cothority.conf /etc/security/limits.d") if err != nil { dbg.Fatal("Couldn't copy limit-file:", err) } } doneHosts[i] = true dbg.Lvl3("Host", h, "cleaned up") }(i, h) } cleanupChannel := make(chan string) go func() { wg.Wait() dbg.Lvl3("Done waiting") cleanupChannel <- "done" }() select { case msg := <-cleanupChannel: dbg.Lvl3("Received msg from cleanupChannel", msg) case <-time.After(time.Second * 20): for i, m := range doneHosts { if !m { dbg.Lvl1("Missing host:", hostlist[i], "- You should run") dbg.Lvl1("/usr/testbed/bin/node_reboot", hostlist[i]) } } dbg.Fatal("Didn't receive all replies while cleaning up - aborting.") } if kill { dbg.Lvl2("Only cleaning up - returning") return } // ADDITIONS : the monitoring part // Proxy will listen on Sink:SinkPort and redirect every packet to // RedirectionAddress:RedirectionPort. With remote tunnel forwarding it will // be forwarded to the real sink dbg.Lvl2("Launching proxy redirecting to", deterlab.ProxyRedirectionAddress, ":", deterlab.ProxyRedirectionPort) go monitor.Proxy(deterlab.ProxyRedirectionAddress + ":" + deterlab.ProxyRedirectionPort) time.Sleep(time.Second) hostnames := deterlab.Hostnames dbg.Lvl4("hostnames:", hostnames) // mapping from physical node name to the timestamp servers that are running there // essentially a reverse mapping of vpmap except ports are also used physToServer := make(map[string][]string) for _, virt := range hostnames { v, _, _ := net.SplitHostPort(virt) p := vpmap[v] ss := physToServer[p] ss = append(ss, virt) physToServer[p] = ss } monitorAddr := deterlab.MonitorAddress + ":" + monitor.SinkPort servers := len(physToServer) ppm := len(deterlab.Hostnames) / servers dbg.Lvl1("starting", servers, "forkexecs with", ppm, "processes each =", servers*ppm) totalServers := 0 for phys, virts := range physToServer { if len(virts) == 0 { continue } totalServers += len(virts) dbg.Lvl2("Launching forkexec for", len(virts), "clients on", phys) wg.Add(1) go func(phys string) { //dbg.Lvl4("running on", phys, cmd) defer wg.Done() dbg.Lvl4("Starting servers on physical machine", phys, "with logger =", deterlab.MonitorAddress+":"+monitor.SinkPort) err := cliutils.SshRunStdout("", phys, "cd remote; sudo ./forkexec"+ " -physaddr="+phys+" -logger="+deterlab.MonitorAddress+":"+monitor.SinkPort) if err != nil { dbg.Lvl1("Error starting timestamper:", err, phys) } dbg.Lvl4("Finished with Timestamper", phys) }(phys) } if deterlab.App == "stamp" || deterlab.App == "sign" { // Every stampserver that started up (mostly waiting for configuration-reading) // writes its name in coll_stamp_dir - once everybody is there, the directory // is cleaned to flag it's OK to go on. start_config := time.Now() for { s, err := monitor.GetReady(monitorAddr) if err != nil { log.Fatal("Couldn't contact monitor") } else { dbg.Lvl1("Processes started:", s.Ready, "/", totalServers, "after", time.Since(start_config)) if s.Ready == totalServers { dbg.Lvl2("Everybody ready, starting") // 1st second for everybody to see the deleted directory // 2nd second for everybody to start up listening time.Sleep(time.Second * 2) break } } time.Sleep(time.Second) } } switch deterlab.App { case "stamp": dbg.Lvl1("starting", len(physToServer), "time clients") // start up one timeclient per physical machine // it requests timestamps from all the servers on that machine amroot := true for p, ss := range physToServer { if len(ss) == 0 { dbg.Lvl3("ss is empty - not starting") continue } servers := strings.Join(ss, ",") dbg.Lvl3("Starting with ss=", ss) go func(p string, a bool) { cmdstr := "cd remote; sudo ./" + deterlab.App + " -mode=client " + " -name=client@" + p + " -server=" + servers + " -amroot=" + strconv.FormatBool(a) dbg.Lvl3("Users will launch client:", cmdstr) err := cliutils.SshRunStdout("", p, cmdstr) if err != nil { dbg.Lvl4("Deter.go: error for", deterlab.App, err) } dbg.Lvl4("Deter.go: Finished with", deterlab.App, p) }(p, amroot) amroot = false } case "sign_no": // TODO: for now it's only a simple startup from the server dbg.Lvl1("Starting only one client") } // wait for the servers to finish before stopping wg.Wait() }