func (driver *MesosSchedulerDriver) statusUpdated(from *upid.UPID, pbMsg proto.Message) { msg := pbMsg.(*mesos.StatusUpdateMessage) if driver.Status() == mesos.Status_DRIVER_ABORTED { log.V(1).Infoln("Ignoring StatusUpdate message, the driver is aborted!") return } if !driver.connected { log.V(1).Infoln("Ignoring StatusUpdate message, the driver is not connected!") return } if !driver.MasterPid.Equal(from) { log.Warningf("ignoring status message because it was sent from '%v' instead of leading master '%v'", from, driver.MasterPid) return } log.V(2).Infoln("Received status update from ", from.String(), " status source:", msg.GetPid()) driver.Scheduler.StatusUpdate(driver, msg.Update.GetStatus()) if driver.Status() == mesos.Status_DRIVER_ABORTED { log.V(1).Infoln("Not sending StatusUpdate ACK, the driver is aborted!") return } // Send StatusUpdate Acknowledgement // Only send ACK if udpate was not from this driver if !from.Equal(driver.self) && msg.GetPid() != from.String() { ackMsg := &mesos.StatusUpdateAcknowledgementMessage{ SlaveId: msg.Update.SlaveId, FrameworkId: driver.FrameworkInfo.Id, TaskId: msg.Update.Status.TaskId, Uuid: msg.Update.Uuid, } log.V(2).Infoln("Sending status update ACK to ", from.String()) if err := driver.send(driver.MasterPid, ackMsg); err != nil { log.Errorf("Failed to send StatusUpdate ACK message: %v\n", err) return } } else { log.V(1).Infoln("Not sending ACK, update is not from slave:", from.String()) } }
// Route puts a message either in the incoming or outgoing queue. // This method is useful for: // 1) routing internal error to callback handlers // 2) testing components without starting remote servers. func (m *MesosMessenger) Route(ctx context.Context, upid *upid.UPID, msg proto.Message) error { // if destination is not self, send to outbound. if !upid.Equal(m.upid) { return m.Send(ctx, upid, msg) } data, err := proto.Marshal(msg) if err != nil { return err } name := getMessageName(msg) return m.tr.Inject(ctx, &Message{upid, name, msg, data}) }
// Send puts a message into the outgoing queue, waiting to be sent. // With buffered channels, this will not block under moderate throughput. // When an error is generated, the error can be communicated by placing // a message on the incoming queue to be handled upstream. func (m *MesosMessenger) Send(ctx context.Context, upid *upid.UPID, msg proto.Message) error { if upid == nil { panic("cannot sent a message to a nil pid") } else if upid.Equal(m.upid) { return fmt.Errorf("Send the message to self") } name := getMessageName(msg) log.V(2).Infof("Sending message %v to %v\n", name, upid) select { case <-ctx.Done(): return ctx.Err() case m.encodingQueue <- &Message{upid, name, msg, nil}: return nil } }
func (driver *MesosSchedulerDriver) handleAuthenticationResult(from *upid.UPID, pbMsg proto.Message) { if driver.status != mesos.Status_DRIVER_RUNNING { log.V(1).Info("ignoring authenticate because driver is not running") return } if !from.Equal(driver.self) { log.Errorf("ignoring authentication result message received from upid '%v'", from) return } if driver.authenticated { // programming error panic("already authenticated") } if driver.masterPid == nil { log.Infoln("ignoring authentication result because master is lost") driver.authenticating.cancel() // cancel any in-progress background attempt // disable future retries until we get a new master driver.reauthenticate = false return } msg := pbMsg.(*mesos.InternalAuthenticationResult) if driver.reauthenticate || !msg.GetCompleted() || driver.masterPid.String() != msg.GetPid() { log.Infof("failed to authenticate with master %v: master changed", driver.masterPid) driver.authenticating.cancel() // cancel any in-progress background authentication driver.reauthenticate = false driver.tryAuthentication() return } if !msg.GetSuccess() { log.Errorf("master %v refused authentication", driver.masterPid) return } driver.authenticated = true go driver.doReliableRegistration(float64(registrationBackoffFactor)) }
// lead master detection callback. func (driver *MesosSchedulerDriver) handleMasterChanged(from *upid.UPID, pbMsg proto.Message) { if driver.status == mesos.Status_DRIVER_ABORTED { log.Info("Ignoring master change because the driver is aborted.") return } else if !from.Equal(driver.self) { log.Errorf("ignoring master changed message received from upid '%v'", from) return } // Reconnect every time a master is detected. if driver.connected { log.V(3).Info("Disconnecting scheduler.") driver.masterPid = nil driver.withScheduler(func(s Scheduler) { s.Disconnected(driver) }) } msg := pbMsg.(*mesos.InternalMasterChangeDetected) master := msg.Master driver.connected = false driver.authenticated = false if master != nil { log.Infof("New master %s detected\n", master.GetPid()) pid, err := upid.Parse(master.GetPid()) if err != nil { panic("Unable to parse Master's PID value.") // this should not happen. } driver.masterPid = pid // save for downstream ops. driver.tryAuthentication() } else { log.Infoln("No master detected.") } }
// statusUpdated expects to be guarded by eventLock func (driver *MesosSchedulerDriver) statusUpdated(from *upid.UPID, pbMsg proto.Message) { msg := pbMsg.(*mesos.StatusUpdateMessage) if driver.status != mesos.Status_DRIVER_RUNNING { log.V(1).Infoln("Ignoring StatusUpdate message, the driver is not running!") return } if !from.Equal(driver.self) { if !driver.connected { log.V(1).Infoln("Ignoring StatusUpdate message, the driver is not connected!") return } if !driver.masterPid.Equal(from) { log.Warningf("ignoring status message because it was sent from '%v' instead of leading master '%v'", from, driver.masterPid) return } } log.V(2).Infof("Received status update from %q status source %q", from.String(), msg.GetPid()) status := msg.Update.GetStatus() // see https://github.com/apache/mesos/blob/master/src/sched/sched.cpp#L887 // If the update does not have a 'uuid', it does not need // acknowledging. However, prior to 0.23.0, the update uuid // was required and always set. We also don't want to ACK updates // that were internally generated. In 0.24.0, we can rely on the // update uuid check here, until then we must still check for // this being sent from the driver (from == UPID()) or from // the master (pid == UPID()). // TODO(vinod): Get rid of this logic in 0.25.0 because master // and slave correctly set task status in 0.24.0. if clearUUID := len(msg.Update.Uuid) == 0 || from.Equal(driver.self) || msg.GetPid() == driver.self.String(); clearUUID { status.Uuid = nil } else { status.Uuid = msg.Update.Uuid } driver.withScheduler(func(s Scheduler) { s.StatusUpdate(driver, status) }) if driver.status == mesos.Status_DRIVER_ABORTED { log.V(1).Infoln("Not sending StatusUpdate ACK, the driver is aborted!") return } // Send StatusUpdate Acknowledgement; see above for the rules. // Only send ACK if udpate was not from this driver and spec'd a UUID; this is compat w/ 0.23+ ackRequired := len(msg.Update.Uuid) > 0 && !from.Equal(driver.self) && msg.GetPid() != driver.self.String() if ackRequired { ackMsg := &mesos.StatusUpdateAcknowledgementMessage{ SlaveId: msg.Update.SlaveId, FrameworkId: driver.frameworkInfo.Id, TaskId: msg.Update.Status.TaskId, Uuid: msg.Update.Uuid, } log.V(2).Infof("Sending ACK for status update %+v to %q", *msg.Update, from.String()) if err := driver.send(driver.masterPid, ackMsg); err != nil { log.Errorf("Failed to send StatusUpdate ACK message: %v", err) return } } else { log.V(2).Infof("Not sending ACK, update is not from slave %q", from.String()) } }