// connectFallback opens an API connection using the supplied info, // or a copy using the fallbackPassword; blocks for up to 5 minutes // if it encounters a CodeNotProvisioned error, periodically retrying; // and eventually, having either succeeded, failed, or timed out, returns: // // * (if successful) the connection, and whether the fallback was used // * (otherwise) whatever error it most recently encountered // // It's clear that it still has machine-agent concerns still baked in, // but there's no obvious practical path to separating those entirely at // the moment. // // (The right answer is probably to treat CodeNotProvisioned as a normal // error and depend on (currently nonexistent) exponential backoff in // the framework: either it'll work soon enough, or the controller will // spot the error and nuke the machine anyway. No harm leaving the local // agent running and occasionally polling for changes -- it won't do much // until it's managed to log in, and any suicide-cutoff point we pick here // will be objectively bad in some circumstances.) func connectFallback( apiOpen api.OpenFunc, info *api.Info, fallbackPassword string, ) ( conn api.Connection, didFallback bool, err error, ) { // We expect to assign to `conn`, `err`, *and* `info` in // the course of this operation: wrapping this repeated // atom in a func currently seems to be less treacherous // than the alternatives. var tryConnect = func() { conn, err = apiOpen(info, api.DialOpts{}) } // Try to connect, trying both the primary and fallback // passwords if necessary; and update info, and remember // which password we used. tryConnect() if params.IsCodeUnauthorized(err) { // We've perhaps used the wrong password, so // try again with the fallback password. infoCopy := *info info = &infoCopy info.Password = fallbackPassword didFallback = true tryConnect() } // We might be a machine agent that's started before its // provisioner has had a chance to report instance data // to the machine; wait a fair while to ensure we really // are in the (expected rare) provisioner-crash situation // that would cause permanent CodeNotProvisioned (which // indicates that the controller has forgotten about us, // and is provisioning a new instance, so we really should // uninstall). // // Yes, it's dumb that this can't be interrupted, and that // it's not configurable without patching. if params.IsCodeNotProvisioned(err) { for a := checkProvisionedStrategy.Start(); a.Next(); { tryConnect() if !params.IsCodeNotProvisioned(err) { break } } } // At this point we've run out of reasons to retry connecting, // and just go with whatever error we last saw (if any). if err != nil { return nil, false, errors.Trace(err) } return conn, didFallback, nil }
// pendingOrDead looks up machines with ids and returns those that do not // have an instance id assigned yet, and also those that are dead. func (task *provisionerTask) pendingOrDeadOrMaintain(ids []string) (pending, dead, maintain []*apiprovisioner.Machine, err error) { for _, id := range ids { machine, found := task.machines[id] if !found { logger.Infof("machine %q not found", id) continue } switch machine.Life() { case params.Dying: if _, err := machine.InstanceId(); err == nil { continue } else if !params.IsCodeNotProvisioned(err) { return nil, nil, nil, errors.Annotatef(err, "failed to load machine %q instance id: %v", machine) } logger.Infof("killing dying, unprovisioned machine %q", machine) if err := machine.EnsureDead(); err != nil { return nil, nil, nil, errors.Annotatef(err, "failed to ensure machine dead %q: %v", machine) } fallthrough case params.Dead: dead = append(dead, machine) continue } if instId, err := machine.InstanceId(); err != nil { if !params.IsCodeNotProvisioned(err) { logger.Errorf("failed to load machine %q instance id: %v", machine, err) continue } status, _, err := machine.Status() if err != nil { logger.Infof("cannot get machine %q status: %v", machine, err) continue } if status == params.StatusPending { pending = append(pending, machine) logger.Infof("found machine %q pending provisioning", machine) continue } } else { logger.Infof("machine %v already started as instance %q", machine, instId) if err != nil { logger.Infof("Error fetching provisioning info") } else { isLxc := regexp.MustCompile(`\d+/lxc/\d+`) if isLxc.MatchString(machine.Id()) { maintain = append(maintain, machine) } } } } logger.Tracef("pending machines: %v", pending) logger.Tracef("dead machines: %v", dead) return }
// refreshMachine refreshes the specified machine's instance ID. If it is set, // then the machine watcher is stopped and pending entities' parameters are // updated. If the machine is not provisioned yet, this method is a no-op. func refreshMachine(ctx *context, tag names.MachineTag) error { w, ok := ctx.machines[tag] if !ok { return errors.Errorf("machine %s is not being watched", tag.Id()) } stopAndRemove := func() error { worker.Stop(w) delete(ctx.machines, tag) return nil } results, err := ctx.config.Machines.InstanceIds([]names.MachineTag{tag}) if err != nil { return errors.Annotate(err, "getting machine instance ID") } if err := results[0].Error; err != nil { if params.IsCodeNotProvisioned(err) { return nil } else if params.IsCodeNotFound(err) { // Machine is gone, so stop watching. return stopAndRemove() } return errors.Annotate(err, "getting machine instance ID") } machineProvisioned(ctx, tag, instance.Id(results[0].Result)) // machine provisioning is the only thing we care about; // stop the watcher. return stopAndRemove() }
// RestoreError makes a best effort at converting the given error // back into an error originally converted by ServerError(). If the // error could not be converted then false is returned. func RestoreError(err error) (error, bool) { err = errors.Cause(err) if apiErr, ok := err.(*params.Error); !ok { return err, false } else if apiErr == nil { return nil, true } if params.ErrCode(err) == "" { return err, false } msg := err.Error() if singleton, ok := singletonError(err); ok { return singleton, true } // TODO(ericsnow) Support the other error types handled by ServerError(). switch { case params.IsCodeUnauthorized(err): return errors.NewUnauthorized(nil, msg), true case params.IsCodeNotFound(err): // TODO(ericsnow) UnknownModelError should be handled here too. // ...by parsing msg? return errors.NewNotFound(nil, msg), true case params.IsCodeAlreadyExists(err): return errors.NewAlreadyExists(nil, msg), true case params.IsCodeNotAssigned(err): return errors.NewNotAssigned(nil, msg), true case params.IsCodeHasAssignedUnits(err): // TODO(ericsnow) Handle state.HasAssignedUnitsError here. // ...by parsing msg? return err, false case params.IsCodeNoAddressSet(err): // TODO(ericsnow) Handle isNoAddressSetError here. // ...by parsing msg? return err, false case params.IsCodeNotProvisioned(err): return errors.NewNotProvisioned(nil, msg), true case params.IsCodeUpgradeInProgress(err): // TODO(ericsnow) Handle state.UpgradeInProgressError here. // ...by parsing msg? return err, false case params.IsCodeMachineHasAttachedStorage(err): // TODO(ericsnow) Handle state.HasAttachmentsError here. // ...by parsing msg? return err, false case params.IsCodeNotSupported(err): return errors.NewNotSupported(nil, msg), true case params.IsBadRequest(err): return errors.NewBadRequest(nil, msg), true case params.IsMethodNotAllowed(err): return errors.NewMethodNotAllowed(nil, msg), true case params.ErrCode(err) == params.CodeDischargeRequired: // TODO(ericsnow) Handle DischargeRequiredError here. return err, false default: return err, false } }
func (s *storageAttachmentWatcher) loop() error { for { select { case <-s.catacomb.Dying(): return s.catacomb.ErrDying() case _, ok := <-s.changes: if !ok { return errors.New("storage attachment watcher closed") } snapshot, err := getStorageSnapshot( s.st, s.storageTag, s.unitTag, ) if params.IsCodeNotFound(err) { // The storage attachment was removed // from state, so we can stop watching. return nil } else if params.IsCodeNotProvisioned(err) { // We do not care about unattached // storage here. continue } else if err != nil { return err } change := storageAttachmentChange{ s.storageTag, snapshot, } select { case <-s.catacomb.Dying(): return s.catacomb.ErrDying() case s.out <- change: } } } }
// findUnknownInstances finds instances which are not associated with a machine. func (task *provisionerTask) findUnknownInstances(stopping []instance.Instance) ([]instance.Instance, error) { // Make a copy of the instances we know about. instances := make(map[instance.Id]instance.Instance) for k, v := range task.instances { instances[k] = v } for _, m := range task.machines { instId, err := m.InstanceId() switch { case err == nil: delete(instances, instId) case params.IsCodeNotProvisioned(err): case params.IsCodeNotFoundOrCodeUnauthorized(err): default: return nil, err } } // Now remove all those instances that we are stopping already as we // know about those and don't want to include them in the unknown list. for _, inst := range stopping { delete(instances, inst.Id()) } var unknown []instance.Instance for _, inst := range instances { unknown = append(unknown, inst) } return unknown, nil }
// watchStorageAttachment starts watching the storage attachment with // the specified storage tag, waits for its first event, and records // the information in the current snapshot. func (w *RemoteStateWatcher) watchStorageAttachment( tag names.StorageTag, life params.Life, saw watcher.NotifyWatcher, ) error { var storageSnapshot StorageSnapshot select { case <-w.catacomb.Dying(): return w.catacomb.ErrDying() case _, ok := <-saw.Changes(): if !ok { return errors.New("storage attachment watcher closed") } var err error storageSnapshot, err = getStorageSnapshot(w.st, tag, w.unit.Tag()) if params.IsCodeNotProvisioned(err) { // If the storage is unprovisioned, we still want to // record the attachment, but we'll mark it as // unattached. This allows the uniter to wait for // pending storage attachments to be provisioned. storageSnapshot = StorageSnapshot{Life: life} } else if err != nil { return errors.Annotatef(err, "processing initial storage attachment change") } } innerSAW, err := newStorageAttachmentWatcher( w.st, saw, w.unit.Tag(), tag, w.storageAttachmentChanges, ) if err != nil { return errors.Trace(err) } w.current.Storage[tag] = storageSnapshot w.storageAttachmentWatchers[tag] = innerSAW return nil }
// processAliveFilesystems processes the FilesystemResults for Alive filesystems, // provisioning filesystems and setting the info in state as necessary. func processAliveFilesystems(ctx *context, tags []names.FilesystemTag, filesystemResults []params.FilesystemResult) error { // Filter out the already-provisioned filesystems. pending := make([]names.FilesystemTag, 0, len(tags)) for i, result := range filesystemResults { tag := tags[i] if result.Error == nil { // Filesystem is already provisioned: skip. logger.Debugf("filesystem %q is already provisioned, nothing to do", tag.Id()) filesystem, err := filesystemFromParams(result.Result) if err != nil { return errors.Annotate(err, "getting filesystem info") } ctx.filesystems[tag] = filesystem if filesystem.Volume != (names.VolumeTag{}) { // Ensure that volume-backed filesystems' block // devices are present even after creating the // filesystem, so that attachments can be made. maybeAddPendingVolumeBlockDevice(ctx, filesystem.Volume) } continue } if !params.IsCodeNotProvisioned(result.Error) { return errors.Annotatef( result.Error, "getting filesystem information for filesystem %q", tag.Id(), ) } // The filesystem has not yet been provisioned, so record its tag // to enquire about parameters below. pending = append(pending, tag) } if len(pending) == 0 { return nil } paramsResults, err := ctx.filesystemAccessor.FilesystemParams(pending) if err != nil { return errors.Annotate(err, "getting filesystem params") } for i, result := range paramsResults { if result.Error != nil { return errors.Annotate(result.Error, "getting filesystem parameters") } params, err := filesystemParamsFromParams(result.Result) if err != nil { return errors.Annotate(err, "getting filesystem parameters") } ctx.pendingFilesystems[pending[i]] = params if params.Volume != (names.VolumeTag{}) { // The filesystem is volume-backed: we must watch for // the corresponding block device. This will trigger a // one-time (for the volume) forced update of block // devices. If the block device is not immediately // available, then we rely on the watcher. The forced // update is necessary in case the block device was // added to state already, and we didn't observe it. maybeAddPendingVolumeBlockDevice(ctx, params.Volume) } } return nil }
func classifyMachine(machine ClassifiableMachine) ( MachineClassification, error) { switch machine.Life() { case params.Dying: if _, err := machine.InstanceId(); err == nil { return None, nil } else if !params.IsCodeNotProvisioned(err) { return None, errors.Annotatef(err, "failed to load dying machine id:%s, details:%v", machine.Id(), machine) } logger.Infof("killing dying, unprovisioned machine %q", machine) if err := machine.EnsureDead(); err != nil { return None, errors.Annotatef(err, "failed to ensure machine dead id:%s, details:%v", machine.Id(), machine) } fallthrough case params.Dead: return Dead, nil } if instId, err := machine.InstanceId(); err != nil { if !params.IsCodeNotProvisioned(err) { return None, errors.Annotatef(err, "failed to load machine id:%s, details:%v", machine.Id(), machine) } status, _, err := machine.Status() if err != nil { logger.Infof("cannot get machine id:%s, details:%v, err:%v", machine.Id(), machine, err) return None, nil } if status == params.StatusPending { logger.Infof("found machine pending provisioning id:%s, details:%v", machine.Id(), machine) return Pending, nil } } else { logger.Infof("machine %s already started as instance %q", machine.Id(), instId) if err != nil { logger.Infof("Error fetching provisioning info") } else { isLxc := regexp.MustCompile(`\d+/lxc/\d+`) isKvm := regexp.MustCompile(`\d+/kvm/\d+`) if isLxc.MatchString(machine.Id()) || isKvm.MatchString(machine.Id()) { return Maintain, nil } } } return None, nil }
func machineLoop(context machineContext, m machine, changed <-chan struct{}) error { // Use a short poll interval when initially waiting for // a machine's address and machine agent to start, and a long one when it already // has an address and the machine agent is started. pollInterval := ShortPoll pollInstance := true for { if pollInstance { instInfo, err := pollInstanceInfo(context, m) if err != nil && !params.IsCodeNotProvisioned(err) { // If the provider doesn't implement Addresses/Status now, // it never will until we're upgraded, so don't bother // asking any more. We could use less resources // by taking down the entire worker, but this is easier for now // (and hopefully the local provider will implement // Addresses/Status in the not-too-distant future), // so we won't need to worry about this case at all. if params.IsCodeNotImplemented(err) { pollInterval = 365 * 24 * time.Hour } else { return err } } machineStatus := params.StatusPending if err == nil { if statusInfo, err := m.Status(); err != nil { logger.Warningf("cannot get current machine status for machine %v: %v", m.Id(), err) } else { machineStatus = statusInfo.Status } } if len(instInfo.addresses) > 0 && instInfo.status != "" && machineStatus == params.StatusStarted { // We've got at least one address and a status and instance is started, so poll infrequently. pollInterval = LongPoll } else if pollInterval < LongPoll { // We have no addresses or not started - poll increasingly rarely // until we do. pollInterval = time.Duration(float64(pollInterval) * ShortPollBackoff) } pollInstance = false } select { case <-time.After(pollInterval): pollInstance = true case <-context.dying(): return nil case <-changed: if err := m.Refresh(); err != nil { return err } if m.Life() == params.Dead { return nil } } } }
// pollInstanceInfo checks the current provider addresses and status // for the given machine's instance, and sets them on the machine if they've changed. func pollInstanceInfo(context machineContext, m machine) (instInfo instanceInfo, err error) { instInfo = instanceInfo{} instId, err := m.InstanceId() // We can't ask the machine for its addresses if it isn't provisioned yet. if params.IsCodeNotProvisioned(err) { return instanceInfo{}, err } if err != nil { return instanceInfo{}, errors.Annotate(err, "cannot get machine's instance id") } instInfo, err = context.instanceInfo(instId) if err != nil { // TODO (anastasiamac 2016-02-01) This does not look like it needs to be removed now. if params.IsCodeNotImplemented(err) { return instanceInfo{}, err } logger.Warningf("cannot get instance info for instance %q: %v", instId, err) return instInfo, nil } if instStat, err := m.InstanceStatus(); err != nil { // This should never occur since the machine is provisioned. // But just in case, we reset polled status so we try again next time. logger.Warningf("cannot get current instance status for machine %v: %v", m.Id(), err) instInfo.status = instance.InstanceStatus{status.Unknown, ""} } else { // TODO(perrito666) add status validation. currentInstStatus := instance.InstanceStatus{ Status: status.Status(instStat.Status), Message: instStat.Info, } if instInfo.status != currentInstStatus { logger.Infof("machine %q instance status changed from %q to %q", m.Id(), currentInstStatus, instInfo.status) if err = m.SetInstanceStatus(instInfo.status.Status, instInfo.status.Message, nil); err != nil { logger.Errorf("cannot set instance status on %q: %v", m, err) return instanceInfo{}, err } } } if m.Life() != params.Dead { providerAddresses, err := m.ProviderAddresses() if err != nil { return instanceInfo{}, err } if !addressesEqual(providerAddresses, instInfo.addresses) { logger.Infof("machine %q has new addresses: %v", m.Id(), instInfo.addresses) if err := m.SetProviderAddresses(instInfo.addresses...); err != nil { logger.Errorf("cannot set addresses on %q: %v", m, err) return instanceInfo{}, err } } } return instInfo, nil }
func classifyMachine(machine ClassifiableMachine) ( MachineClassification, error) { switch machine.Life() { case params.Dying: if _, err := machine.InstanceId(); err == nil { return None, nil } else if !params.IsCodeNotProvisioned(err) { return None, errors.Annotatef(err, "failed to load dying machine id:%s, details:%v", machine.Id(), machine) } logger.Infof("killing dying, unprovisioned machine %q", machine) if err := machine.EnsureDead(); err != nil { return None, errors.Annotatef(err, "failed to ensure machine dead id:%s, details:%v", machine.Id(), machine) } fallthrough case params.Dead: return Dead, nil } instId, err := machine.InstanceId() if err != nil { if !params.IsCodeNotProvisioned(err) { return None, errors.Annotatef(err, "failed to load machine id:%s, details:%v", machine.Id(), machine) } machineStatus, _, err := machine.Status() if err != nil { logger.Infof("cannot get machine id:%s, details:%v, err:%v", machine.Id(), machine, err) return None, nil } if machineStatus == status.Pending { logger.Infof("found machine pending provisioning id:%s, details:%v", machine.Id(), machine) return Pending, nil } return None, nil } logger.Infof("machine %s already started as instance %q", machine.Id(), instId) if state.ContainerTypeFromId(machine.Id()) != "" { return Maintain, nil } return None, nil }
func machineLoop(context machineContext, m machine, changed <-chan struct{}) error { // Use a short poll interval when initially waiting for // a machine's address and machine agent to start, and a long one when it already // has an address and the machine agent is started. pollInterval := ShortPoll pollInstance := true for { if pollInstance { instInfo, err := pollInstanceInfo(context, m) if err != nil && !params.IsCodeNotProvisioned(err) { return err } machineStatus := status.StatusPending if err == nil { if statusInfo, err := m.Status(); err != nil { logger.Warningf("cannot get current machine status for machine %v: %v", m.Id(), err) } else { machineStatus = statusInfo.Status } } // the extra condition below (checking allocating/pending) is here to improve user experience // without it the instance status will say "pending" for +10 minutes after the agent comes up to "started" if instInfo.status.Status != status.StatusAllocating && instInfo.status.Status != status.StatusPending { if len(instInfo.addresses) > 0 && machineStatus == status.StatusStarted { // We've got at least one address and a status and instance is started, so poll infrequently. pollInterval = LongPoll } else if pollInterval < LongPoll { // We have no addresses or not started - poll increasingly rarely // until we do. pollInterval = time.Duration(float64(pollInterval) * ShortPollBackoff) } } pollInstance = false } select { case <-context.dying(): return context.errDying() case <-time.After(pollInterval): // TODO(fwereade): 2016-03-17 lp:1558657 pollInstance = true case <-changed: if err := m.Refresh(); err != nil { return err } if m.Life() == params.Dead { return nil } } } }
// refreshVolumeBlockDevices refreshes the block devices for the specified // volumes. func refreshVolumeBlockDevices(ctx *context, volumeTags []names.VolumeTag) error { machineTag, ok := ctx.config.Scope.(names.MachineTag) if !ok { // This function should only be called by machine-scoped // storage provisioners. panic(errors.New("expected machine tag")) } ids := make([]params.MachineStorageId, len(volumeTags)) for i, volumeTag := range volumeTags { ids[i] = params.MachineStorageId{ MachineTag: machineTag.String(), AttachmentTag: volumeTag.String(), } } results, err := ctx.config.Volumes.VolumeBlockDevices(ids) if err != nil { return errors.Annotate(err, "refreshing volume block devices") } for i, result := range results { if result.Error == nil { ctx.volumeBlockDevices[volumeTags[i]] = result.Result for _, params := range ctx.incompleteFilesystemParams { if params.Volume == volumeTags[i] { updatePendingFilesystem(ctx, params) } } for id, params := range ctx.incompleteFilesystemAttachmentParams { filesystem, ok := ctx.filesystems[params.Filesystem] if !ok { continue } if filesystem.Volume == volumeTags[i] { updatePendingFilesystemAttachment(ctx, id, params) } } } else if params.IsCodeNotProvisioned(result.Error) || params.IsCodeNotFound(result.Error) { // Either the volume (attachment) isn't provisioned, // or the corresponding block device is not yet known. // // Neither of these errors is fatal; we just wait for // the block device watcher to notify us again. } else { return errors.Annotatef( err, "getting block device info for volume attachment %v", ids[i], ) } } return nil }
func openAPIStateUsingInfo(info *api.Info, oldPassword string) (api.Connection, bool, error) { // We let the API dial fail immediately because the // runner's loop outside the caller of openAPIState will // keep on retrying. If we block for ages here, // then the worker that's calling this cannot // be interrupted. st, err := apiOpen(info, api.DialOpts{}) usedOldPassword := false if params.IsCodeUnauthorized(err) { // We've perhaps used the wrong password, so // try again with the fallback password. infoCopy := *info info = &infoCopy info.Password = oldPassword usedOldPassword = true st, err = apiOpen(info, api.DialOpts{}) } // The provisioner may take some time to record the agent's // machine instance ID, so wait until it does so. if params.IsCodeNotProvisioned(err) { for a := checkProvisionedStrategy.Start(); a.Next(); { st, err = apiOpen(info, api.DialOpts{}) if !params.IsCodeNotProvisioned(err) { break } } } if err != nil { if params.IsCodeNotProvisioned(err) || params.IsCodeUnauthorized(err) { logger.Errorf("agent terminating due to error returned during API open: %v", err) return nil, false, worker.ErrTerminateAgent } return nil, false, err } return st, usedOldPassword, nil }
// processAliveFilesystemAttachments processes the FilesystemAttachmentResults // for Alive filesystem attachments, attaching filesystems and setting the info // in state as necessary. func processAliveFilesystemAttachments( ctx *context, ids []params.MachineStorageId, filesystemAttachmentResults []params.FilesystemAttachmentResult, ) error { // Filter out the already-attached. pending := make([]params.MachineStorageId, 0, len(ids)) for i, result := range filesystemAttachmentResults { if result.Error == nil { delete(ctx.pendingFilesystemAttachments, ids[i]) // Filesystem attachment is already provisioned: if we // didn't (re)attach in this session, then we must do // so now. action := "nothing to do" if _, ok := ctx.filesystemAttachments[ids[i]]; !ok { // Not yet (re)attached in this session. pending = append(pending, ids[i]) action = "will reattach" } logger.Debugf( "%s is already attached to %s, %s", ids[i].AttachmentTag, ids[i].MachineTag, action, ) continue } if !params.IsCodeNotProvisioned(result.Error) { return errors.Annotatef( result.Error, "getting information for attachment %v", ids[i], ) } // The filesystem has not yet been attached, so // record its tag to enquire about parameters below. pending = append(pending, ids[i]) } if len(pending) == 0 { return nil } params, err := filesystemAttachmentParams(ctx, pending) if err != nil { return errors.Trace(err) } for i, params := range params { if params.InstanceId == "" { watchMachine(ctx, params.Machine) } ctx.pendingFilesystemAttachments[pending[i]] = params } return nil }
// processAliveVolumeAttachments processes the VolumeAttachmentResults // for Alive volume attachments, attaching volumes and setting the info // in state as necessary. func processAliveVolumeAttachments( ctx *context, ids []params.MachineStorageId, volumeAttachmentResults []params.VolumeAttachmentResult, ) error { // Filter out the already-attached. pending := make([]params.MachineStorageId, 0, len(ids)) for i, result := range volumeAttachmentResults { if result.Error == nil { // Volume attachment is already provisioned: if we // didn't (re)attach in this session, then we must // do so now. action := "nothing to do" if _, ok := ctx.volumeAttachments[ids[i]]; !ok { // Not yet (re)attached in this session. pending = append(pending, ids[i]) action = "will reattach" } logger.Debugf( "%s is already attached to %s, %s", ids[i].AttachmentTag, ids[i].MachineTag, action, ) removePendingVolumeAttachment(ctx, ids[i]) continue } if !params.IsCodeNotProvisioned(result.Error) { return errors.Annotatef( result.Error, "getting information for attachment %v", ids[i], ) } // The volume has not yet been provisioned, so record its tag // to enquire about parameters below. pending = append(pending, ids[i]) } if len(pending) == 0 { return nil } params, err := volumeAttachmentParams(ctx, pending) if err != nil { return errors.Trace(err) } for i, params := range params { if volume, ok := ctx.volumes[params.Volume]; ok { params.VolumeId = volume.VolumeId } updatePendingVolumeAttachment(ctx, pending[i], params) } return nil }
// update is called when hook.SourceChanges are applied. func (s *storageSource) update() error { attachment, err := s.st.StorageAttachment(s.storageTag, s.unitTag) if params.IsCodeNotFound(err) { // The storage attachment was removed from state, which // implies that the storage has been detached already. logger.Debugf("storage attachment %q not found", s.storageTag.Id()) return nil } else if params.IsCodeNotProvisioned(err) { logger.Debugf("storage attachment %q not provisioned yet", s.storageTag.Id()) return nil } else if err != nil { logger.Debugf("error refreshing storage details: %v", err) return errors.Annotate(err, "refreshing storage details") } return s.storageHookQueue.Update(attachment) }
func machineLoop(context machineContext, m machine, changed <-chan struct{}) error { // Use a short poll interval when initially waiting for // a machine's address and machine agent to start, and a long one when it already // has an address and the machine agent is started. pollInterval := ShortPoll pollInstance := true for { if pollInstance { instInfo, err := pollInstanceInfo(context, m) if err != nil && !params.IsCodeNotProvisioned(err) { return err } machineStatus := params.StatusPending if err == nil { if statusInfo, err := m.Status(); err != nil { logger.Warningf("cannot get current machine status for machine %v: %v", m.Id(), err) } else { machineStatus = statusInfo.Status } } if len(instInfo.addresses) > 0 && instInfo.status != "" && machineStatus == params.StatusStarted { // We've got at least one address and a status and instance is started, so poll infrequently. pollInterval = LongPoll } else if pollInterval < LongPoll { // We have no addresses or not started - poll increasingly rarely // until we do. pollInterval = time.Duration(float64(pollInterval) * ShortPollBackoff) } pollInstance = false } select { case <-context.dying(): return context.errDying() case <-time.After(pollInterval): pollInstance = true case <-changed: if err := m.Refresh(); err != nil { return err } if m.Life() == params.Dead { return nil } } } }
// processDeadFilesystems processes the FilesystemResults for Dead filesystems, // deprovisioning filesystems and removing from state as necessary. func processDeadFilesystems(ctx *context, tags []names.FilesystemTag, filesystemResults []params.FilesystemResult) error { for _, tag := range tags { delete(ctx.pendingFilesystems, tag) } var destroy []names.FilesystemTag var remove []names.Tag for i, result := range filesystemResults { tag := tags[i] if result.Error == nil { logger.Debugf("filesystem %s is provisioned, queuing for deprovisioning", tag.Id()) filesystem, err := filesystemFromParams(result.Result) if err != nil { return errors.Annotate(err, "getting filesystem info") } ctx.filesystems[tag] = filesystem destroy = append(destroy, tag) continue } if params.IsCodeNotProvisioned(result.Error) { logger.Debugf("filesystem %s is not provisioned, queuing for removal", tag.Id()) remove = append(remove, tag) continue } return errors.Annotatef(result.Error, "getting filesystem information for filesystem %s", tag.Id()) } if len(destroy)+len(remove) == 0 { return nil } if len(destroy) > 0 { errorResults, err := destroyFilesystems(ctx, destroy) if err != nil { return errors.Annotate(err, "destroying filesystems") } for i, tag := range destroy { if err := errorResults[i]; err != nil { return errors.Annotatef(err, "destroying %s", names.ReadableString(tag)) } remove = append(remove, tag) } } if err := removeEntities(ctx, remove); err != nil { return errors.Annotate(err, "removing filesystems from state") } return nil }
// processDyingFilesystems processes the FilesystemResults for Dying filesystems, // removing them from provisioning-pending as necessary, and storing the current // filesystem info for provisioned filesystems so that attachments may be destroyed. func processDyingFilesystems(ctx *context, tags []names.FilesystemTag, filesystemResults []params.FilesystemResult) error { for _, tag := range tags { delete(ctx.pendingFilesystems, tag) } for i, result := range filesystemResults { tag := tags[i] if result.Error == nil { filesystem, err := filesystemFromParams(result.Result) if err != nil { return errors.Annotate(err, "getting filesystem info") } ctx.filesystems[tag] = filesystem } else if !params.IsCodeNotProvisioned(result.Error) { return errors.Annotatef(result.Error, "getting information for filesystem %s", tag.Id()) } } return nil }
// pollInstanceInfo checks the current provider addresses and status // for the given machine's instance, and sets them on the machine if they've changed. func pollInstanceInfo(context machineContext, m machine) (instInfo instanceInfo, err error) { instInfo = instanceInfo{} instId, err := m.InstanceId() // We can't ask the machine for its addresses if it isn't provisioned yet. if params.IsCodeNotProvisioned(err) { return instInfo, err } if err != nil { return instInfo, fmt.Errorf("cannot get machine's instance id: %v", err) } instInfo, err = context.instanceInfo(instId) if err != nil { if params.IsCodeNotImplemented(err) { return instInfo, err } logger.Warningf("cannot get instance info for instance %q: %v", instId, err) return instInfo, nil } currentInstStatus, err := m.InstanceStatus() if err != nil { // This should never occur since the machine is provisioned. // But just in case, we reset polled status so we try again next time. logger.Warningf("cannot get current instance status for machine %v: %v", m.Id(), err) instInfo.status = "" } else { if instInfo.status != currentInstStatus { logger.Infof("machine %q instance status changed from %q to %q", m.Id(), currentInstStatus, instInfo.status) if err = m.SetInstanceStatus(instInfo.status); err != nil { logger.Errorf("cannot set instance status on %q: %v", m, err) } } } providerAddresses, err := m.ProviderAddresses() if err != nil { return instInfo, err } if !addressesEqual(providerAddresses, instInfo.addresses) { logger.Infof("machine %q has new addresses: %v", m.Id(), instInfo.addresses) if err = m.SetProviderAddresses(instInfo.addresses...); err != nil { logger.Errorf("cannot set addresses on %q: %v", m, err) } } return instInfo, err }
// processAliveFilesystems processes the FilesystemResults for Alive filesystems, // provisioning filesystems and setting the info in state as necessary. func processAliveFilesystems(ctx *context, tags []names.FilesystemTag, filesystemResults []params.FilesystemResult) error { // Filter out the already-provisioned filesystems. pending := make([]names.FilesystemTag, 0, len(tags)) for i, result := range filesystemResults { tag := tags[i] if result.Error == nil { // Filesystem is already provisioned: skip. logger.Debugf("filesystem %q is already provisioned, nothing to do", tag.Id()) filesystem, err := filesystemFromParams(result.Result) if err != nil { return errors.Annotate(err, "getting filesystem info") } updateFilesystem(ctx, filesystem) if filesystem.Volume != (names.VolumeTag{}) { // Ensure that volume-backed filesystems' block // devices are present even after creating the // filesystem, so that attachments can be made. maybeAddPendingVolumeBlockDevice(ctx, filesystem.Volume) } continue } if !params.IsCodeNotProvisioned(result.Error) { return errors.Annotatef( result.Error, "getting filesystem information for filesystem %q", tag.Id(), ) } // The filesystem has not yet been provisioned, so record its tag // to enquire about parameters below. pending = append(pending, tag) } if len(pending) == 0 { return nil } params, err := filesystemParams(ctx, pending) if err != nil { return errors.Annotate(err, "getting filesystem params") } for _, params := range params { updatePendingFilesystem(ctx, params) } return nil }
// processDyingVolumeAttachments processes the VolumeAttachmentResults for // Dying volume attachments, detaching volumes and updating state as necessary. func processDyingVolumeAttachments( ctx *context, ids []params.MachineStorageId, volumeAttachmentResults []params.VolumeAttachmentResult, ) error { for _, id := range ids { removePendingVolumeAttachment(ctx, id) } detach := make([]params.MachineStorageId, 0, len(ids)) remove := make([]params.MachineStorageId, 0, len(ids)) for i, result := range volumeAttachmentResults { id := ids[i] if result.Error == nil { detach = append(detach, id) continue } if params.IsCodeNotProvisioned(result.Error) { remove = append(remove, id) continue } return errors.Annotatef(result.Error, "getting information for volume attachment %v", id) } if len(detach) > 0 { attachmentParams, err := volumeAttachmentParams(ctx, detach) if err != nil { return errors.Trace(err) } ops := make([]scheduleOp, len(attachmentParams)) for i, p := range attachmentParams { ops[i] = &detachVolumeOp{args: p} } scheduleOperations(ctx, ops...) } if err := removeAttachments(ctx, remove); err != nil { return errors.Annotate(err, "removing attachments from state") } for _, id := range remove { delete(ctx.volumeAttachments, id) } return nil }
// processDyingFilesystemAttachments processes the FilesystemAttachmentResults for // Dying filesystem attachments, detaching filesystems and updating state as necessary. func processDyingFilesystemAttachments( ctx *context, ids []params.MachineStorageId, filesystemAttachmentResults []params.FilesystemAttachmentResult, ) error { if len(ids) == 0 { return nil } for _, id := range ids { delete(ctx.pendingFilesystemAttachments, id) } detach := make([]params.MachineStorageId, 0, len(ids)) remove := make([]params.MachineStorageId, 0, len(ids)) for i, result := range filesystemAttachmentResults { id := ids[i] if result.Error == nil { detach = append(detach, id) continue } if params.IsCodeNotProvisioned(result.Error) { remove = append(remove, id) continue } return errors.Annotatef(result.Error, "getting information for filesystem attachment %v", id) } if len(detach) > 0 { attachmentParams, err := filesystemAttachmentParams(ctx, detach) if err != nil { return errors.Trace(err) } for i, params := range attachmentParams { ctx.pendingDyingFilesystemAttachments[detach[i]] = params } } if len(remove) > 0 { if err := removeAttachments(ctx, remove); err != nil { return errors.Annotate(err, "removing attachments from state") } } return nil }
// processDyingVolumeAttachments processes the VolumeAttachmentResults for // Dying volume attachments, detaching volumes and updating state as necessary. func processDyingVolumeAttachments( ctx *context, ids []params.MachineStorageId, volumeAttachmentResults []params.VolumeAttachmentResult, ) error { if len(ids) == 0 { return nil } for _, id := range ids { delete(ctx.pendingVolumeAttachments, id) } detach := make([]params.MachineStorageId, 0, len(ids)) remove := make([]params.MachineStorageId, 0, len(ids)) for i, result := range volumeAttachmentResults { id := ids[i] if result.Error == nil { detach = append(detach, id) continue } if params.IsCodeNotProvisioned(result.Error) { remove = append(remove, id) continue } return errors.Annotatef(result.Error, "getting information for volume attachment %v", id) } if len(detach) > 0 { attachmentParams, err := volumeAttachmentParams(ctx, detach) if err != nil { return errors.Trace(err) } if err := detachVolumes(ctx, attachmentParams); err != nil { return errors.Annotate(err, "detaching volumes") } remove = append(remove, detach...) } if err := removeAttachments(ctx, remove); err != nil { return errors.Annotate(err, "removing attachments from state") } return nil }
// processAliveVolumes processes the VolumeResults for Alive volumes, // provisioning volumes and setting the info in state as necessary. func processAliveVolumes(ctx *context, tags []names.Tag, volumeResults []params.VolumeResult) error { // Filter out the already-provisioned volumes. pending := make([]names.VolumeTag, 0, len(tags)) for i, result := range volumeResults { volumeTag := tags[i].(names.VolumeTag) if result.Error == nil { // Volume is already provisioned: skip. logger.Debugf("volume %q is already provisioned, nothing to do", tags[i].Id()) volume, err := volumeFromParams(result.Result) if err != nil { return errors.Annotate(err, "getting volume info") } ctx.volumes[volumeTag] = volume delete(ctx.pendingVolumes, volumeTag) continue } if !params.IsCodeNotProvisioned(result.Error) { return errors.Annotatef( result.Error, "getting volume information for volume %q", tags[i].Id(), ) } // The volume has not yet been provisioned, so record its tag // to enquire about parameters below. pending = append(pending, volumeTag) } if len(pending) == 0 { return nil } volumeParams, err := volumeParams(ctx, pending) if err != nil { return errors.Annotate(err, "getting volume params") } for i, params := range volumeParams { if params.Attachment.InstanceId == "" { watchMachine(ctx, params.Attachment.Machine) } ctx.pendingVolumes[pending[i]] = params } return nil }
// processDeadVolumes processes the VolumeResults for Dead volumes, // deprovisioning volumes and removing from state as necessary. func processDeadVolumes(ctx *context, tags []names.VolumeTag, volumeResults []params.VolumeResult) error { for _, tag := range tags { removePendingVolume(ctx, tag) } var destroy []names.VolumeTag var remove []names.Tag for i, result := range volumeResults { tag := tags[i] if result.Error == nil { logger.Debugf("volume %s is provisioned, queuing for deprovisioning", tag.Id()) volume, err := volumeFromParams(result.Result) if err != nil { return errors.Annotate(err, "getting volume info") } updateVolume(ctx, volume) destroy = append(destroy, tag) continue } if params.IsCodeNotProvisioned(result.Error) { logger.Debugf("volume %s is not provisioned, queuing for removal", tag.Id()) remove = append(remove, tag) continue } return errors.Annotatef(result.Error, "getting volume information for volume %s", tag.Id()) } if len(destroy) > 0 { ops := make([]scheduleOp, len(destroy)) for i, tag := range destroy { ops[i] = &destroyVolumeOp{tag: tag} } scheduleOperations(ctx, ops...) } if err := removeEntities(ctx, remove); err != nil { return errors.Annotate(err, "removing volumes from state") } return nil }
// ScaryConnect logs into the API using the supplied agent's credentials, // like OnlyConnect; and then: // // * returns ErrConnectImpossible if the agent entity is dead or // unauthorized for all known passwords; // * if the agent's config does not specify a model, tries to record the // model we just connected to; // * replaces insecure credentials with freshly (locally) generated ones // (and returns ErrPasswordChanged, expecting to be reinvoked); // * unconditionally resets the remote-state password to its current value // (for what seems like a bad reason). // // This is clearly a mess but at least now it's a documented and localized // mess; it should be used only when making the primary API connection for // a machine or unit agent running in its own process. func ScaryConnect(a agent.Agent, apiOpen api.OpenFunc) (_ api.Connection, err error) { agentConfig := a.CurrentConfig() info, ok := agentConfig.APIInfo() if !ok { return nil, errors.New("API info not available") } oldPassword := agentConfig.OldPassword() defer func() { cause := errors.Cause(err) switch { case cause == apiagent.ErrDenied: case cause == errAgentEntityDead: case params.IsCodeUnauthorized(cause): case params.IsCodeNotProvisioned(cause): default: return } err = ErrConnectImpossible }() // Start connection... conn, usedOldPassword, err := connectFallback(apiOpen, info, oldPassword) if err != nil { return nil, errors.Trace(err) } // ...and make sure we close it if anything goes wrong. defer func() { if err != nil { if err := conn.Close(); err != nil { logger.Errorf("while closing API connection: %v", err) } } }() // Update the agent config if necessary; this should just read the // conn's properties, rather than making api calls, so we don't // need to think about facades yet. maybeSetAgentModelTag(a, conn) // newConnFacade is patched out in export_test, because exhaustion. // proper config/params struct would be better. facade, err := newConnFacade(conn) if err != nil { return nil, errors.Trace(err) } // First of all, see if we're dead or removed, which will render // any further work pointless. entity := agentConfig.Tag() life, err := facade.Life(entity) if err != nil { return nil, errors.Trace(err) } switch life { case apiagent.Alive, apiagent.Dying: case apiagent.Dead: return nil, errAgentEntityDead default: return nil, errors.Errorf("unknown life value %q", life) } // If we need to change the password, it's far cleaner to // exit with ErrChangedPassword and depend on the framework // for expeditious retry than it is to mess around with those // responsibilities in here. if usedOldPassword { logger.Debugf("changing password...") err := changePassword(oldPassword, a, facade) if err != nil { return nil, errors.Trace(err) } logger.Debugf("password changed") return nil, ErrChangedPassword } // If we *didn't* need to change the password, we apparently need // to reset our password to its current value anyway. Reportedly, // a machine agent promoted to controller status might have bad // auth data in mongodb, and this "fixes" it... but this is scary, // wrong, coincidental duct tape. The RTTD is to make controller- // promotion work correctly in the first place. // // Still, can't fix everything at once. if err := facade.SetPassword(entity, info.Password); err != nil { return nil, errors.Annotate(err, "can't reset agent password") } return conn, nil }
func (s *workerSuite) TestWorker(c *gc.C) { // Most functionality is already tested in detail - we // just need to test that things are wired together // correctly. s.PatchValue(&ShortPoll, 10*time.Millisecond) s.PatchValue(&LongPoll, 10*time.Millisecond) s.PatchValue(&gatherTime, 10*time.Millisecond) machines, insts := s.setupScenario(c) s.State.StartSync() w := NewWorker(s.api) defer func() { c.Assert(worker.Stop(w), gc.IsNil) }() checkInstanceInfo := func(index int, m machine, expectedStatus string) bool { isProvisioned := true status, err := m.InstanceStatus() if params.IsCodeNotProvisioned(err) { isProvisioned = false } else { c.Assert(err, jc.ErrorIsNil) } providerAddresses, err := m.ProviderAddresses() c.Assert(err, jc.ErrorIsNil) return reflect.DeepEqual(providerAddresses, s.addressesForIndex(index)) && (!isProvisioned || status == expectedStatus) } // Wait for the odd numbered machines in the // first half of the machine slice to be given their // addresses and status. for a := coretesting.LongAttempt.Start(); a.Next(); { if !a.HasNext() { c.Fatalf("timed out waiting for instance info") } if machinesSatisfy(c, machines, func(i int, m *apiinstancepoller.Machine) bool { if i < len(machines)/2 && i%2 == 1 { return checkInstanceInfo(i, m, "running") } status, err := m.InstanceStatus() if i%2 == 0 { // Even machines not provisioned yet. c.Assert(err, jc.Satisfies, params.IsCodeNotProvisioned) } else { c.Assert(status, gc.Equals, "") } stm, err := s.State.Machine(m.Id()) c.Assert(err, jc.ErrorIsNil) return len(stm.Addresses()) == 0 }) { break } } // Now provision the even machines in the first half and watch them get addresses. for i := 0; i < len(insts)/2; i += 2 { m, err := s.State.Machine(machines[i].Id()) c.Assert(err, jc.ErrorIsNil) err = m.SetProvisioned(insts[i].Id(), "nonce", nil) c.Assert(err, jc.ErrorIsNil) dummy.SetInstanceAddresses(insts[i], s.addressesForIndex(i)) dummy.SetInstanceStatus(insts[i], "running") } for a := coretesting.LongAttempt.Start(); a.Next(); { if !a.HasNext() { c.Fatalf("timed out waiting for machine instance info") } if machinesSatisfy(c, machines, func(i int, m *apiinstancepoller.Machine) bool { if i < len(machines)/2 { return checkInstanceInfo(i, m, "running") } // Machines in second half still have no addresses, nor status. status, err := m.InstanceStatus() if i%2 == 0 { // Even machines not provisioned yet. c.Assert(err, jc.Satisfies, params.IsCodeNotProvisioned) } else { c.Assert(status, gc.Equals, "") } stm, err := s.State.Machine(m.Id()) c.Assert(err, jc.ErrorIsNil) return len(stm.Addresses()) == 0 }) { break } } // Provision the remaining machines and check the address and status. for i := len(insts) / 2; i < len(insts); i++ { if i%2 == 0 { m, err := s.State.Machine(machines[i].Id()) c.Assert(err, jc.ErrorIsNil) err = m.SetProvisioned(insts[i].Id(), "nonce", nil) c.Assert(err, jc.ErrorIsNil) } dummy.SetInstanceAddresses(insts[i], s.addressesForIndex(i)) dummy.SetInstanceStatus(insts[i], "running") } for a := coretesting.LongAttempt.Start(); a.Next(); { if !a.HasNext() { c.Fatalf("timed out waiting for machine instance info") } if machinesSatisfy(c, machines, func(i int, m *apiinstancepoller.Machine) bool { return checkInstanceInfo(i, m, "running") }) { break } } }