func main() { /* create a new DRMAA1 session */ s, _ := drmaa.MakeSession() defer s.Exit() /* submit the sleep 3600 command to the cluster by using DRMAA */ jt, _ := s.AllocateJobTemplate() jt.SetRemoteCommand("sleep") jt.SetArg("3600") jobId, _ := s.RunJob(&jt) d, _ := time.ParseDuration("500ms") /* wait activly until job is running (use blocking call in real apps) */ ps, _ := s.JobPs(jobId) for ps != drmaa.PsRunning { fmt.Println("status is: ", ps) time.Sleep(d) ps, _ = s.JobPs(jobId) } /* get detailed job status (Grid Engine specific) */ jobStatus, err := gestatus.GetJobStatus(&s, jobId) if err != nil { fmt.Println(err) return } fmt.Printf("Job Name: %s\n", jobStatus.JobName()) fmt.Printf("Job Number: %d\n", jobStatus.JobId()) fmt.Printf("Job Script: %s\n", jobStatus.JobScript()) fmt.Printf("Job Args: %s\n", jobStatus.JobArgs()) fmt.Printf("Job Owner: %s\n", jobStatus.JobOwner()) fmt.Printf("Job Group: %s\n", jobStatus.JobGroup()) fmt.Printf("Job UID: %d\n", jobStatus.JobUID()) fmt.Printf("Job GID: %d\n", jobStatus.JobGID()) fmt.Printf("Job accounting string: %s\n", jobStatus.JobAccountName()) fmt.Printf("Job is now: %t\n", jobStatus.IsImmediateJob()) fmt.Printf("Job is binary: %t\n", jobStatus.IsBinaryJob()) fmt.Printf("Job has reservation: %t\n", jobStatus.HasReservation()) fmt.Printf("Job is array job: %t\n", jobStatus.IsArrayJob()) fmt.Printf("Job merges stderr %t\n", jobStatus.JobMergesStderr()) fmt.Printf("Job has 'no shell' requested: %t\n", jobStatus.HasNoShell()) fmt.Printf("Job has memory binding: %t\n", jobStatus.HasMemoryBinding()) fmt.Printf("Job memory binding: %s\n", jobStatus.MemoryBinding()) fmt.Printf("Job submission time: %s\n", jobStatus.SubmissionTime()) fmt.Printf("Job start time: %s\n", jobStatus.StartTime()) fmt.Printf("Job deadline: %s\n", jobStatus.JobDeadline()) fmt.Printf("Job mail options: %s\n", jobStatus.MailOptions()) fmt.Printf("Job AR: %d\n", jobStatus.AdvanceReservationID()) fmt.Printf("Job POSIX priority: %d\n", jobStatus.PosixPriority()) fmt.Printf("Job Class Name: %s\n", jobStatus.JobClassName()) fmt.Printf("Job Mailing Adresses: %s\n", jobStatus.MailAdresses()) fmt.Printf("Job Destination Queue Instance List: %s\n", jobStatus.DestinationQueueInstanceList()) fmt.Printf("Job Destination Host List: %s\n", jobStatus.DestinationHostList()) fmt.Printf("Job Tasks: %d\n", jobStatus.TasksCount()) }
func main() { flag.BoolVar(showHelp, "h", false, "Show help message") flag.Usage = func() { fmt.Printf(helpMessage, os.Args[0], os.Args[0]) } flag.Parse() if flag.NArg() >= 1 && strings.ToLower(flag.Args()[0]) == "help" { *showHelp = true } if flag.NArg() < 2 { *showHelp = true } if *runDebug { dvid.Mode = dvid.Debug } if *showHelp { flag.Usage() os.Exit(0) } // Get the command numNodes, err := strconv.Atoi(flag.Args()[0]) if err != nil { fmt.Fprintf(os.Stderr, "Illegal # of nodes specified '%s': %s\n", flag.Args()[0], err) os.Exit(1) } if numNodes < 1 { fmt.Fprintln(os.Stderr, "Need at least one node specified in arguments!") flag.Usage() os.Exit(0) } command := flag.Args()[1:] // Create a new DRMAA1 session. s, _ := drmaa.MakeSession() defer s.Exit() // Submit the node processes to the cluster. jobIds := make([]string, numNodes) hostCh := make(chan string, numNodes) jt, _ := s.AllocateJobTemplate() jt.SetRemoteCommand("/groups/flyem/proj/builds/cluster2014/bin/dvid-node") jt.SetEmail([]string{*email}) options := fmt.Sprintf("-pe batch %d", 16) jt.SetNativeSpecification(options) for n := 0; n < numNodes; n++ { jt.SetArgs(command) jobIds[n], _ = s.RunJob(&jt) go waitForRunning(s, jobIds[n], hostCh) } // Gather all the running hostnames hostnames := []string{} for n := 0; n < numNodes; n++ { hostname := <-hostCh if hostname == "" { fmt.Println("Could not start node!") } else { fmt.Printf("Started node on %s (%d/%d)\n", hostname, n+1, numNodes) hostnames = append(hostnames, hostname) } } // Wait until servers have spun up, preventing race condition on sending peers. time.Sleep(10 * time.Second) // Relay set of hostnames to all nodes. arg := node.Peers{hostnames} for _, hostname := range hostnames { address := fmt.Sprintf("%s%s", hostname, node.RPCAddress) client, err := rpc.DialHTTP("tcp", address) if err != nil { fmt.Fprintf(os.Stderr, "Did not find node at %s: %s\n", address, err.Error()) } else { var reply int err = client.Call("RPCConnection.SetPeers", &arg, &reply) if err != nil { fmt.Fprintf(os.Stderr, "RPC error to %s: %s\n", address, err.Error()) } else { fmt.Printf("Successfully sent %d peers to %s\n", len(hostnames), address) } } } }