func (task *provisionerTask) startMachine( machine *apiprovisioner.Machine, provisioningInfo *params.ProvisioningInfo, startInstanceParams environs.StartInstanceParams, ) error { result, err := task.broker.StartInstance(startInstanceParams) if err != nil { // If this is a retryable error, we retry once if instance.IsRetryableCreationError(errors.Cause(err)) { logger.Infof("retryable error received on start instance - retrying instance creation") result, err = task.broker.StartInstance(startInstanceParams) if err != nil { return task.setErrorStatus("cannot start instance for machine after a retry %q: %v", machine, err) } } else { // Set the state to error, so the machine will be skipped next // time until the error is resolved, but don't return an // error; just keep going with the other machines. return task.setErrorStatus("cannot start instance for machine %q: %v", machine, err) } } inst := result.Instance hardware := result.Hardware nonce := startInstanceParams.InstanceConfig.MachineNonce networks, ifaces, err := task.prepareNetworkAndInterfaces(result.NetworkInfo) if err != nil { return task.setErrorStatus("cannot prepare network for machine %q: %v", machine, err) } volumes := volumesToApiserver(result.Volumes) volumeAttachments := volumeAttachmentsToApiserver(result.VolumeAttachments) // TODO(dimitern) In a newer Provisioner API version, change // SetInstanceInfo or add a new method that takes and saves in // state all the information available on a network.InterfaceInfo // for each interface, so we can later manage interfaces // dynamically at run-time. err = machine.SetInstanceInfo(inst.Id(), nonce, hardware, networks, ifaces, volumes, volumeAttachments) if err != nil && params.IsCodeNotImplemented(err) { return fmt.Errorf("cannot provision instance %v for machine %q with networks: not implemented", inst.Id(), machine) } else if err == nil { logger.Infof( "started machine %s as instance %s with hardware %q, networks %v, interfaces %v, volumes %v, volume attachments %v, subnets to zones %v", machine, inst.Id(), hardware, networks, ifaces, volumes, volumeAttachments, startInstanceParams.SubnetsToZones, ) return nil } // We need to stop the instance right away here, set error status and go on. task.setErrorStatus("cannot register instance for machine %v: %v", machine, err) if err := task.broker.StopInstances(inst.Id()); err != nil { // We cannot even stop the instance, log the error and quit. return errors.Annotatef(err, "cannot stop instance %q for machine %v", inst.Id(), machine) } return nil }
func (s *LxcSuite) TestCreateContainerFailsWithInjectedCreateError(c *gc.C) { errorChannel := make(chan error, 1) cleanup := mock.PatchCreateTransientErrorInjectionChannel(errorChannel) defer cleanup() errorChannel <- errors.New("lxc-create error") manager := s.makeManager(c, "test") _, err := containertesting.CreateContainerTest(c, manager, "1/lxc/0") c.Assert(err, gc.NotNil) // this should be a retryable error isRetryable := instance.IsRetryableCreationError(errors.Cause(err)) c.Assert(isRetryable, jc.IsTrue) c.Assert(err, gc.ErrorMatches, "lxc container creation failed.*") }
func (s *LxcSuite) TestCreateContainerFailsWithInjectedStartError(c *gc.C) { errorChannel := make(chan error, 1) cleanup := mock.PatchStartTransientErrorInjectionChannel(errorChannel) defer cleanup() // One injected error means the container creation will fail // but the destroy function will clean up the remaining container // resulting in a RetryableCreationError errorChannel <- errors.New("start error") manager := s.makeManager(c, "test") _, err := containertesting.CreateContainerTest(c, manager, "1/lxc/0") c.Assert(err, gc.NotNil) // this should be a retryable error isRetryable := instance.IsRetryableCreationError(errors.Cause(err)) c.Assert(isRetryable, jc.IsTrue) }
func (s *LxcSuite) TestCreateContainerWithInjectedErrorDestroyFails(c *gc.C) { errorChannel := make(chan error, 2) cleanup := mock.PatchStartTransientErrorInjectionChannel(errorChannel) defer cleanup() // Two injected errors mean that the container creation and subsequent // destroy will fail. This should not result in a RetryableCreationError // as the container was left in an error state errorChannel <- errors.New("create error") errorChannel <- errors.New("destroy error") manager := s.makeManager(c, "test") _, err := containertesting.CreateContainerTest(c, manager, "1/lxc/0") c.Assert(err, gc.NotNil) // this should not be a retryable error isRetryable := instance.IsRetryableCreationError(errors.Cause(err)) c.Assert(isRetryable, jc.IsFalse) }
func (task *provisionerTask) startMachine( machine *apiprovisioner.Machine, provisioningInfo *params.ProvisioningInfo, startInstanceParams environs.StartInstanceParams, ) error { result, err := task.broker.StartInstance(startInstanceParams) if err != nil { if !instance.IsRetryableCreationError(errors.Cause(err)) { // Set the state to error, so the machine will be skipped next // time until the error is resolved, but don't return an // error; just keep going with the other machines. return task.setErrorStatus("cannot start instance for machine %q: %v", machine, err) } logger.Infof("retryable error received on start instance: %v", err) for count := task.retryStartInstanceStrategy.retryCount; count > 0; count-- { if task.retryStartInstanceStrategy.retryDelay > 0 { select { case <-task.catacomb.Dying(): return task.catacomb.ErrDying() case <-time.After(task.retryStartInstanceStrategy.retryDelay): } } result, err = task.broker.StartInstance(startInstanceParams) if err == nil { break } // If this was the last attempt and an error was received, set the error // status on the machine. if count == 1 { return task.setErrorStatus("cannot start instance for machine %q: %v", machine, err) } } } inst := result.Instance hardware := result.Hardware nonce := startInstanceParams.InstanceConfig.MachineNonce networks, ifaces, err := task.prepareNetworkAndInterfaces(result.NetworkInfo) if err != nil { return task.setErrorStatus("cannot prepare network for machine %q: %v", machine, err) } volumes := volumesToApiserver(result.Volumes) volumeAttachments := volumeAttachmentsToApiserver(result.VolumeAttachments) // TODO(dimitern) In a newer Provisioner API version, change // SetInstanceInfo or add a new method that takes and saves in // state all the information available on a network.InterfaceInfo // for each interface, so we can later manage interfaces // dynamically at run-time. err = machine.SetInstanceInfo(inst.Id(), nonce, hardware, networks, ifaces, volumes, volumeAttachments) if err == nil { logger.Infof( "started machine %s as instance %s with hardware %q, networks %v, interfaces %v, volumes %v, volume attachments %v, subnets to zones %v", machine, inst.Id(), hardware, networks, ifaces, volumes, volumeAttachments, startInstanceParams.SubnetsToZones, ) return nil } // We need to stop the instance right away here, set error status and go on. task.setErrorStatus("cannot register instance for machine %v: %v", machine, err) if err := task.broker.StopInstances(inst.Id()); err != nil { // We cannot even stop the instance, log the error and quit. return errors.Annotatef(err, "cannot stop instance %q for machine %v", inst.Id(), machine) } return nil }