Example #1
0
func ScoreConfigure(si *ScoreInfo, r io.Reader) {
	o.Info("Score: %s (%s)", (*si).Name, (*si).Executable)
	config := NewScoreInfoConfig()
	err := config.Read(r, 1)
	o.MightFail(err, "Error Parsing Score Configuration for %s", si.Name)
	si.updateFromConfig(config)
}
Example #2
0
func regInternalAdd(hostname string) {
	o.Info("Adding host: %s", hostname)
	clientList[hostname] = NewClientInfo()
	// do this initialisation here since it'll help unmask sequencing errors
	clientList[hostname].pendingTasks = make(map[uint64]*TaskRequest)
	clientList[hostname].Player = hostname
}
Example #3
0
func handleRequest(c net.Conn, message interface{}) {
	o.Debug("Request Recieved.  Decoding!")
	ptr, ok := message.(*o.TaskRequest)
	if !ok {
		o.Assert("CC stuffed up - handleRequest got something that wasn't a TaskRequest.")
	}
	task := TaskFromProto(ptr)
	/* search the registry for the task */
	o.Debug("Request for Job.ID %d", task.Id)
	existing := TaskGet(task.Id)
	if nil != existing {
		if existing.MyResponse.IsFinished() {
			o.Debug("job%d: Resending Response", task.Id)
			sendResponse(c, existing.MyResponse)
		}
	} else {
		// check to see if we have the score
		// add the Job to our Registry
		task.MyResponse = NewTaskResponse()
		task.MyResponse.id = task.Id
		task.MyResponse.State = RESP_PENDING
		TaskAdd(task)
		o.Info("Added New Task (Job ID %d) to our local registry", task.Id)
		// and then push it onto the pending job list so we know it needs actioning.
		appendPendingTask(task)
	}
}
Example #4
0
// handle the signals.  By default, we ignore everything, but the
// three terminal signals, HUP, INT, TERM, we want to explicitly
// handle.
func signalHandler() {
	c := make(chan os.Signal)
	signal.Notify(c, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM)

	for {
		sig := <-c

		ux, ok := sig.(syscall.Signal)
		if !ok {
			o.Warn("Couldn't handle signal %s, coercion failed", sig)
			continue
		}

		switch ux {
		case syscall.SIGHUP:
			o.Info("Reloading configuration...")
			ConfigLoad()
		case syscall.SIGINT, syscall.SIGTERM:
			//FIXME: Gentle Shutdown
			SaveState()
			os.Exit(0)
		}
	}

}
Example #5
0
func handleRequest(c net.Conn, message interface{}) {
	o.Debug("Request Recieved.  Decoding!")
	ptr, ok := message.(*o.ProtoTaskRequest)
	if !ok {
		o.Assert("CC stuffed up - handleRequest got something that wasn't a ProtoTaskRequest.")
	}
	job := o.JobFromProto(ptr)
	/* search the registry for the job */
	o.Debug("Request for Job.ID %d", job.Id)
	existing := o.JobGet(job.Id)
	if nil != existing {
		if existing.MyResponse.IsFinished() {
			o.Debug("job%d: Resending Response", job.Id)
			sendResponse(c, existing.MyResponse)
		}
	} else {
		// check to see if we have the score
		// add the Job to our Registry
		job.MyResponse = o.NewTaskResponse()
		job.MyResponse.Id = job.Id
		job.MyResponse.State = o.RESP_PENDING
		o.JobAdd(job)
		o.Info("Added New Job %d to our local registry", job.Id)
		// and then push it onto the pending job list so we know it needs actioning.
		appendPendingJob(job)
	}
}
Example #6
0
func connectMe(initialDelay int64) {
	var backOff int64 = initialDelay
	for {
		// Sleep first.
		if backOff > 0 {
			o.Info("Sleeping for %d seconds", backOff/1e9)
			err := time.Sleep(backOff)
			o.MightFail(err, "Couldn't Sleep")
			backOff *= ReconnectDelayScale
			if backOff > MaximumReconnectDelay {
				backOff = MaximumReconnectDelay
			}
		} else {
			backOff = InitialReconnectDelay
		}

		tconf := &tls.Config{
			RootCAs: CACertPool,
		}
		tconf.Certificates = append(tconf.Certificates, CertPair)

		// update our local hostname.
		LocalHostname = GetStringOpt("player name")
		if LocalHostname == "" {
			LocalHostname = o.ProbeHostname()
			o.Warn("No hostname provided - probed hostname: %s", LocalHostname)
		}

		masterHostname := GetStringOpt("master")

		raddr := fmt.Sprintf("%s:%d", masterHostname, 2258)
		o.Info("Connecting to %s", raddr)
		conn, err := tls.Dial("tcp", raddr, tconf)
		if err == nil {
			conn.Handshake()
			err = conn.VerifyHostname(masterHostname)
		}
		if err == nil {
			nc := new(NewConnectionInfo)
			nc.conn = conn
			nc.timeout = backOff
			newConnection <- nc
			return
		}
		o.Warn("Couldn't connect to master: %s", err)
	}
}
Example #7
0
func batchLogger(jobid uint64, errpipe *os.File) {
	defer errpipe.Close()

	r := bufio.NewReader(errpipe)
	for {
		lb, _, err := r.ReadLine()
		if err == io.EOF {
			return
		}
		if err != nil {
			o.Warn("executionLogger failed: %s", err)
			return
		}
		o.Info("job%d: STDERR: %s", jobid, string(lb))
	}
}
Example #8
0
func handleIdentify(client *ClientInfo, message interface{}) {
	if client.Player != "" {
		o.Warn("Client %s: tried to reintroduce itself", client.Name())
		client.Abort()
		return
	}
	ic, _ := message.(*o.IdentifyClient)
	o.Info("Client %s: identified itself as \"%s\"", client.Name(), *ic.Hostname)
	client.Player = *ic.Hostname
	if !HostAuthorised(client.Player) {
		o.Warn("Client %s: not authorised", client.Name())
		client.Abort()
		return
	}

	/* if we're TLS, verify the client's certificate given the name it used */
	tlsc, ok := client.connection.(*tls.Conn)
	if ok && !*DontVerifyPeer {
		cs := tlsc.ConnectionState()
		if cs.PeerCertificates == nil || cs.PeerCertificates[0] == nil {
			o.Warn("Client %s: peer didn't provide a certificate", client.Name())
			client.Abort()
			return
		}
		err := cs.PeerCertificates[0].VerifyHostname(client.Player)
		if err != nil {
			o.Warn("Client %s: couldn't be identified: %s", client.Name(), err)
			client.Abort()
			return
		}
	}
	reg := ClientGet(client.Player)
	if nil == reg {
		o.Warn("Client %s: couldn't register", client.Name())
		client.Abort()
		return
	}
	client.MergeState(reg)
}
Example #9
0
// handle the signals.  By default, we ignore everything, but the
// three terminal signals, HUP, INT, TERM, we want to explicitly
// handle.
func signalHandler() {
	c := make(chan os.Signal)
	signal.Notify(c, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM)

	for {
		sig := <-c

		ux, ok := sig.(syscall.Signal)
		if !ok {
			o.Warn("Couldn't handle signal %s, coercion failed", sig)
			continue
		}

		switch ux {
		case syscall.SIGHUP:
			o.Info("Reloading configuration...")
			reloadScores <- 1
		case syscall.SIGINT, syscall.SIGTERM:
			os.Exit(0)
		}
	}

}
Example #10
0
func regInternalDel(hostname string) {
	o.Info("Removing host: %s", hostname)
	/* remove it from the registry */
	delete(clientList, hostname)
}
Example #11
0
func ProcessingLoop() {
	var conn net.Conn = nil
	var nextRetryResp *TaskResponse = nil
	var taskCompletionChan <-chan *TaskResponse = nil
	var connectDelay time.Duration
	var doScoreReload bool = false
	// kick off a new connection attempt.
	go connectMe(connectDelay)

	// and this is where we spin!
	for {
		var retryDelay time.Duration = 0
		var retryChan <-chan time.Time = nil

		if conn != nil {
			for nextRetryResp == nil {
				nextRetryResp = getNextUnacknowledgedResponse()
				if nil == nextRetryResp {
					break
				}
				retryDelay = nextRetryResp.RetryTime.Sub(time.Now())
				if retryDelay < 0 {
					sendResponse(conn, nextRetryResp)
					nextRetryResp = nil
				}
			}
			if nextRetryResp != nil {
				retryChan = time.After(retryDelay)
			}
		}
		if taskCompletionChan == nil {
			nextTask := getNextPendingTask()
			if nextTask != nil {
				taskCompletionChan = ExecuteTask(nextTask)
			} else {
				if conn != nil && !pendingTaskRequest {
					o.Debug("Asking for trouble")
					p := o.MakeReadyForTask()
					p.Send(conn)
					o.Debug("Sent Request for trouble")
					pendingTaskRequest = true
				}
			}
		}
		select {
		// Currently executing job finishes.
		case newresp := <-taskCompletionChan:
			o.Debug("job%d: Completed with State %s\n", newresp.id, newresp.State)
			// preemptively set a retrytime.
			newresp.RetryTime = time.Now()
			// ENOCONN - sub it in as our next retryresponse, and prepend the old one onto the queue.
			if nil == conn {
				if nil != nextRetryResp {
					prequeueResponse(nextRetryResp)
				}
				o.Debug("job%d: Queuing Initial Response", newresp.id)
				nextRetryResp = newresp
			} else {
				o.Debug("job%d: Sending Initial Response", newresp.id)
				sendResponse(conn, newresp)
			}
			if doScoreReload {
				o.Info("Performing Deferred score reload")
				LoadScores()
				doScoreReload = false
			}
			taskCompletionChan = nil
		// If the current unacknowledged response needs a retry, send it.
		case <-retryChan:
			sendResponse(conn, nextRetryResp)
			nextRetryResp = nil
		// New connection.  Set up the receiver thread and Introduce ourselves.
		case nci := <-newConnection:
			if conn != nil {
				conn.Close()
			}
			conn = nci.conn
			connectDelay = nci.timeout
			pendingTaskRequest = false

			// start the reader
			go Reader(conn)

			/* Introduce ourself */
			p := o.MakeIdentifyClient(LocalHostname, PlayerVersion)
			p.Send(conn)
		// Lost connection.  Shut downt he connection.
		case <-lostConnection:
			o.Warn("Lost Connection to Master")
			conn.Close()
			conn = nil
			// restart the connection attempts
			go connectMe(connectDelay)
		// Message received from master.  Decode and action.
		case p := <-receivedMessage:
			// because the message could possibly be an ACK, push the next retry response back into the queue so acknowledge can find it.
			if nil != nextRetryResp {
				prequeueResponse(nextRetryResp)
				nextRetryResp = nil
			}
			var upkt interface{} = nil
			if p.Length > 0 {
				var err error
				upkt, err = p.Decode()
				o.MightFail(err, "Couldn't decode packet from master")
			}
			handler, exists := dispatcher[p.Type]
			if exists {
				connectDelay = 0
				handler(conn, upkt)
			} else {
				o.Fail("Unhandled Pkt Type %d", p.Type)
			}
		// Reload scores
		case <-reloadScores:
			// fortunately this is actually completely safe as
			// long as nobody's currently executing.
			// who'd have thunk it?
			if taskCompletionChan == nil {
				o.Info("Reloading scores")
				LoadScores()
			} else {
				o.Info("Deferring score reload (execution in progress)")
				doScoreReload = true
			}
		// Keepalive delay expired.  Send Nop.
		case <-time.After(KeepaliveDelay):
			if conn == nil {
				break
			}
			o.Debug("Sending NOP")
			p := o.MakeNop()
			p.Send(conn)
		}
	}
}
Example #12
0
func handleResult(client *ClientInfo, message interface{}) {
	jr, _ := message.(*o.ProtoTaskResponse)
	r := ResponseFromProto(jr)
	// at this point in time, we only care about terminal
	// condition codes.  a Job that isn't finished is just
	// prodding us back to let us know it lives.
	if r.IsFinished() {
		job := JobGet(r.id)
		if nil == job {
			o.Warn("Client %s: NAKing job%d, couldn't find job data", client.Name(), r.id)
			nack := o.MakeNack(r.id)
			client.sendNow(nack)
		} else {
			job := JobGet(r.id)
			if job != nil {
				/* if the job exists, Ack it. */
				ack := o.MakeAck(r.id)
				client.sendNow(ack)
			}
			// now, we only accept the results if we were
			// expecting the results (ie: it was pending)
			// and expunge the task information from the
			// pending list so we stop bugging the client for it.
			task, exists := client.pendingTasks[r.id]
			if exists {
				// store the result.
				if !JobAddResult(client.Player, r) {
					o.Assert("Couldn't add result for pending task")
				}

				// next, work out if the job is a retryable failure or not
				var didretry bool

				if r.DidFail() {
					o.Info("Client %s: reported failure for job%d", client.Name(), r.id)
					if r.CanRetry() {
						job := JobGet(r.id)
						if job.Scope == SCOPE_ONEOF {
							// right, we're finally deep enough to work out what's going on!
							JobDisqualifyPlayer(r.id, client.Player)
							if len(job.Players) >= 1 {
								// still players left we can try?  then go for it!
								CleanTask(task)
								DispatchTask(task)
								didretry = true
							}
						}
					}
				}
				if !didretry {
					// if we didn't retry, the task needs to be marked as finished.
					task.State = TASK_FINISHED
				}
				// update the job state.
				JobReviewState(r.id)

				delete(client.pendingTasks, r.id)
			}
		}
	}
}
Example #13
0
func ServiceRequests() {
	var sockConfig tls.Config

	// resolve the bind address
	bindAddressStr := GetStringOpt("bind address")
	var bindAddr *net.IPAddr
	if bindAddressStr != "" {
		var err error
		bindAddr, err = net.ResolveIPAddr("ip", bindAddressStr)
		if err != nil {
			o.Warn("Ignoring bind address.  Couldn't resolve \"%s\": %s", bindAddressStr, err)
		} else {
			bindAddr = nil
		}
	}
	// load the x509 certificate and key, then attach it to the tls config.
	x509CertFilename := GetStringOpt("x509 certificate")
	x509PrivateKeyFilename := GetStringOpt("x509 private key")
	serverCert, err := tls.LoadX509KeyPair(x509CertFilename, x509PrivateKeyFilename)
	o.MightFail(err, "Couldn't load certificates")
	sockConfig.Certificates = append(sockConfig.Certificates, serverCert)

	// load the CA certs
	CACertPool = x509.NewCertPool()
	caCertNames := GetCACertList()
	if caCertNames != nil {
		for _, filename := range caCertNames {
			fh, err := os.Open(filename)
			if err != nil {
				o.Warn("Whilst parsing CA certs, couldn't open %s: %s", filename, err)
				continue
			}
			defer fh.Close()
			fi, err := fh.Stat()
			o.MightFail(err, "Couldn't stat CA certificate file: %s", filename)
			data := make([]byte, fi.Size())
			fh.Read(data)
			CACertPool.AppendCertsFromPEM(data)
		}
	}
	sockConfig.ClientCAs = CACertPool

	// determine the server hostname.
	servername := GetStringOpt("server name")
	if servername != "" {
		o.Info("Using %s as the server name", servername)
		sockConfig.ServerName = servername
	} else {
		if bindAddr != nil {
			o.Warn("Probing for FQDN for bind address as none was provided")
			hostnames, err := net.LookupAddr(bindAddr.String())
			o.MightFail(err, "Failed to get full hostname for bind address")
			sockConfig.ServerName = hostnames[0]
		} else {
			o.Warn("Probing for FQDN as no server name was provided")
			sockConfig.ServerName = o.ProbeHostname()
		}
	}

	// ask the client to authenticate
	sockConfig.ClientAuth = tls.RequireAndVerifyClientCert
	if *DontVerifyPeer {
		sockConfig.ClientAuth = tls.RequestClientCert
	}

	/* convert the bindAddress to a string suitable for the Listen call */
	var laddr string
	if bindAddr == nil {
		laddr = fmt.Sprintf(":%d", o.DefaultMasterPort)
	} else {
		laddr = fmt.Sprintf("%s:%d", bindAddr.String(), o.DefaultMasterPort)
	}
	o.Info("Binding to %s...", laddr)
	listener, err := tls.Listen("tcp", laddr, &sockConfig)
	o.MightFail(err, "Couldn't bind TLS listener")

	for {
		o.Info("Waiting for connection...")
		c, err := listener.Accept()
		o.MightFail(err, "Couldn't accept TLS connection")
		o.Info("Connection received from %s", c.RemoteAddr().String())
		HandleConnection(c)
	}
}
Example #14
0
func doExecution(task *TaskRequest, completionChannel chan<- *TaskResponse) {
	// we must notify the parent when we exit.
	defer func(c chan<- *TaskResponse, task *TaskRequest) { c <- task.MyResponse }(completionChannel, task)

	// first of all, verify that the score exists at all.
	score, exists := Scores[task.Score]
	if !exists {
		o.Warn("job%d: request for unknown score: %s", task.Id, task.Score)
		task.MyResponse.State = RESP_FAILED_UNKNOWN_SCORE
		return
	}
	si := NewScoreInterface(task)
	if si == nil {
		o.Warn("job%d: couldn't initialise score interface", task.Id)
		task.MyResponse.State = RESP_FAILED_HOST_ERROR
		return
	}
	if !si.Prepare() {
		o.Warn("job%d: couldn't prepare score interface", task.Id)
		task.MyResponse.State = RESP_FAILED_HOST_ERROR
		return
	}
	defer si.Cleanup()

	eenv := si.SetupProcess()
	task.MyResponse.State = RESP_RUNNING

	procenv := new(os.ProcAttr)
	// Build the default environment.
	procenv.Env = peSetEnv(procenv.Env, "PATH", "/usr/bin:/usr/sbin:/bin:/sbin")
	procenv.Env = peSetEnv(procenv.Env, "IFS", " \t\n")
	pwd, err := os.Getwd()
	if err != nil {
		task.MyResponse.State = RESP_FAILED_HOST_ERROR
		o.Warn("job%d: couldn't resolve PWD: %s", task.Id, err)
		return
	}
	procenv.Env = peSetEnv(procenv.Env, "PWD", pwd)
	// copy in the environment overrides
	for k, v := range eenv.Environment {
		procenv.Env = peSetEnv(procenv.Env, k, v)
	}

	// attach FDs to procenv.
	procenv.Files = make([]*os.File, 3)

	// first off, attach /dev/null to stdin and stdout
	devNull, err := os.OpenFile(os.DevNull, os.O_RDWR|os.O_APPEND, 0666)
	o.MightFail(err, "couldn't open DevNull")
	defer devNull.Close()
	for i := 0; i < 2; i++ {
		procenv.Files[i] = devNull
	}
	// attach STDERR to to our logger via pipe.
	lr, lw, err := os.Pipe()
	o.MightFail(err, "Couldn't create pipe")
	defer lw.Close()
	// lr will be closed by the logger.
	procenv.Files[2] = lw
	// check the environment's configuration and allow it to override stdin, stdout, and FDs 3+
	if nil != eenv.Files {
		for i := range eenv.Files {
			if i < 2 {
				procenv.Files[i] = eenv.Files[i]
			} else {
				procenv.Files = append(procenv.Files, eenv.Files[i])
			}
		}
	}
	var args []string
	args = append(args, eenv.Arguments...)

	o.Info("job%d: executing %s...", task.Id, score.Executable)
	go batchLogger(task.Id, lr)
	proc, err := os.StartProcess(score.Executable, args, procenv)
	if err != nil {
		o.Warn("job%d: failed to start process", task.Id)
		task.MyResponse.State = RESP_FAILED_HOST_ERROR
		return
	}
	wm, err := proc.Wait()
	if err != nil {
		o.Warn("job%d: error waiting for process", task.Id)
		task.MyResponse.State = RESP_FAILED_UNKNOWN
		// Worse of all, we don't even know if we succeeded.
		return
	}
	ws, _ := wm.Sys().(syscall.WaitStatus)
	if !(ws.Signaled() || ws.Exited()) {
		o.Assert("Non Terminal notification received when not expected.")
		return
	}
	if ws.Signaled() {
		o.Warn("job%d: process got signalled", task.Id)
		task.MyResponse.State = RESP_FAILED_UNKNOWN
		return
	}
	if ws.Exited() {
		if 0 == ws.ExitStatus() {
			o.Warn("job%d: process exited OK", task.Id)
			task.MyResponse.State = RESP_FINISHED
		} else {
			o.Warn("job%d: process exited with failure", task.Id)
			task.MyResponse.State = RESP_FAILED
		}
		return
	}
	o.Assert("Should never get here.")
}