Exemplo n.º 1
0
func main() {

	/* create a new DRMAA1 session */
	s, _ := drmaa.MakeSession()
	defer s.Exit()

	/* submit the sleep 3600 command to the cluster
	   by using DRMAA */
	jt, _ := s.AllocateJobTemplate()

	jt.SetRemoteCommand("sleep")
	jt.SetArg("3600")

	jobId, _ := s.RunJob(&jt)

	d, _ := time.ParseDuration("500ms")

	/* wait activly until job is running (use blocking call in real apps) */
	ps, _ := s.JobPs(jobId)

	for ps != drmaa.PsRunning {
		fmt.Println("status is: ", ps)
		time.Sleep(d)
		ps, _ = s.JobPs(jobId)
	}

	/* get detailed job status (Grid Engine specific) */
	jobStatus, err := gestatus.GetJobStatus(&s, jobId)

	if err != nil {
		fmt.Println(err)
		return
	}

	fmt.Printf("Job Name: %s\n", jobStatus.JobName())
	fmt.Printf("Job Number: %d\n", jobStatus.JobId())
	fmt.Printf("Job Script: %s\n", jobStatus.JobScript())
	fmt.Printf("Job Args: %s\n", jobStatus.JobArgs())
	fmt.Printf("Job Owner: %s\n", jobStatus.JobOwner())
	fmt.Printf("Job Group: %s\n", jobStatus.JobGroup())
	fmt.Printf("Job UID: %d\n", jobStatus.JobUID())
	fmt.Printf("Job GID: %d\n", jobStatus.JobGID())
	fmt.Printf("Job accounting string: %s\n", jobStatus.JobAccountName())
	fmt.Printf("Job is now: %t\n", jobStatus.IsImmediateJob())
	fmt.Printf("Job is binary: %t\n", jobStatus.IsBinaryJob())
	fmt.Printf("Job has reservation: %t\n", jobStatus.HasReservation())
	fmt.Printf("Job is array job: %t\n", jobStatus.IsArrayJob())
	fmt.Printf("Job merges stderr %t\n", jobStatus.JobMergesStderr())
	fmt.Printf("Job has 'no shell' requested: %t\n", jobStatus.HasNoShell())
	fmt.Printf("Job has memory binding: %t\n", jobStatus.HasMemoryBinding())
	fmt.Printf("Job memory binding: %s\n", jobStatus.MemoryBinding())
	fmt.Printf("Job submission time: %s\n", jobStatus.SubmissionTime())
	fmt.Printf("Job start time: %s\n", jobStatus.StartTime())
	fmt.Printf("Job deadline: %s\n", jobStatus.JobDeadline())
	fmt.Printf("Job mail options: %s\n", jobStatus.MailOptions())
	fmt.Printf("Job AR: %d\n", jobStatus.AdvanceReservationID())
	fmt.Printf("Job POSIX priority: %d\n", jobStatus.PosixPriority())
	fmt.Printf("Job Class Name: %s\n", jobStatus.JobClassName())
	fmt.Printf("Job Mailing Adresses: %s\n", jobStatus.MailAdresses())
	fmt.Printf("Job Destination Queue Instance List: %s\n", jobStatus.DestinationQueueInstanceList())
	fmt.Printf("Job Destination Host List: %s\n", jobStatus.DestinationHostList())
	fmt.Printf("Job Tasks: %d\n", jobStatus.TasksCount())
}
Exemplo n.º 2
0
func main() {
	flag.BoolVar(showHelp, "h", false, "Show help message")
	flag.Usage = func() {
		fmt.Printf(helpMessage, os.Args[0], os.Args[0])
	}
	flag.Parse()

	if flag.NArg() >= 1 && strings.ToLower(flag.Args()[0]) == "help" {
		*showHelp = true
	}
	if flag.NArg() < 2 {
		*showHelp = true
	}

	if *runDebug {
		dvid.Mode = dvid.Debug
	}
	if *showHelp {
		flag.Usage()
		os.Exit(0)
	}

	// Get the command
	numNodes, err := strconv.Atoi(flag.Args()[0])
	if err != nil {
		fmt.Fprintf(os.Stderr, "Illegal # of nodes specified '%s': %s\n", flag.Args()[0], err)
		os.Exit(1)
	}
	if numNodes < 1 {
		fmt.Fprintln(os.Stderr, "Need at least one node specified in arguments!")
		flag.Usage()
		os.Exit(0)
	}
	command := flag.Args()[1:]

	// Create a new DRMAA1 session.
	s, _ := drmaa.MakeSession()
	defer s.Exit()

	// Submit the node processes to the cluster.
	jobIds := make([]string, numNodes)
	hostCh := make(chan string, numNodes)

	jt, _ := s.AllocateJobTemplate()
	jt.SetRemoteCommand("/groups/flyem/proj/builds/cluster2014/bin/dvid-node")
	jt.SetEmail([]string{*email})

	options := fmt.Sprintf("-pe batch %d", 16)
	jt.SetNativeSpecification(options)

	for n := 0; n < numNodes; n++ {
		jt.SetArgs(command)
		jobIds[n], _ = s.RunJob(&jt)
		go waitForRunning(s, jobIds[n], hostCh)
	}

	// Gather all the running hostnames
	hostnames := []string{}
	for n := 0; n < numNodes; n++ {
		hostname := <-hostCh
		if hostname == "" {
			fmt.Println("Could not start node!")
		} else {
			fmt.Printf("Started node on %s (%d/%d)\n", hostname, n+1, numNodes)
			hostnames = append(hostnames, hostname)
		}
	}

	// Wait until servers have spun up, preventing race condition on sending peers.
	time.Sleep(10 * time.Second)

	// Relay set of hostnames to all nodes.
	arg := node.Peers{hostnames}
	for _, hostname := range hostnames {
		address := fmt.Sprintf("%s%s", hostname, node.RPCAddress)
		client, err := rpc.DialHTTP("tcp", address)
		if err != nil {
			fmt.Fprintf(os.Stderr, "Did not find node at %s: %s\n", address, err.Error())
		} else {
			var reply int
			err = client.Call("RPCConnection.SetPeers", &arg, &reply)
			if err != nil {
				fmt.Fprintf(os.Stderr, "RPC error to %s: %s\n", address, err.Error())
			} else {
				fmt.Printf("Successfully sent %d peers to %s\n", len(hostnames), address)
			}
		}
	}
}