func (s *ProvisionerSuite) TestProvisionerSucceedStartInstanceWithInjectedWrappedRetryableCreationError(c *gc.C) { // Set the retry delay to 0, and retry count to 1 to keep tests short s.PatchValue(provisioner.RetryStrategyDelay, 0*time.Second) s.PatchValue(provisioner.RetryStrategyCount, 1) // create the error injection channel errorInjectionChannel := make(chan error, 1) c.Assert(errorInjectionChannel, gc.NotNil) p := s.newEnvironProvisioner(c) defer stop(c, p) // patch the dummy provider error injection channel cleanup := dummy.PatchTransientErrorInjectionChannel(errorInjectionChannel) defer cleanup() // send the error message once // - instance creation should succeed retryableError := errors.Wrap(errors.New(""), instance.NewRetryableCreationError("container failed to start and was destroyed")) errorInjectionChannel <- retryableError m, err := s.addMachine() c.Assert(err, jc.ErrorIsNil) s.checkStartInstanceNoSecureConnection(c, m) }
func (s *ProvisionerSuite) TestProvisionerStopRetryingIfDying(c *gc.C) { // Create the error injection channel and inject // a retryable error errorInjectionChannel := make(chan error, 1) p := s.newEnvironProvisioner(c) // Don't refer the stop. We will manually stop and verify the result. // patch the dummy provider error injection channel cleanup := dummy.PatchTransientErrorInjectionChannel(errorInjectionChannel) defer cleanup() retryableError := instance.NewRetryableCreationError("container failed to start and was destroyed") errorInjectionChannel <- retryableError m, err := s.addMachine() c.Assert(err, jc.ErrorIsNil) time.Sleep(coretesting.ShortWait) stop(c, p) statusInfo, err := m.Status() c.Assert(err, jc.ErrorIsNil) c.Assert(statusInfo.Status, gc.Equals, state.StatusPending) s.checkNoOperations(c) }
func createContainer( lxcContainer golxc.Container, directory string, networkConfig *container.NetworkConfig, extraCreateArgs, templateParams []string, caCert []byte, ) error { // Generate initial lxc.conf with networking settings. netConfig := generateNetworkConfig(networkConfig) configPath := filepath.Join(directory, "lxc.conf") if err := ioutil.WriteFile(configPath, []byte(netConfig), 0644); err != nil { return errors.Annotatef(err, "failed to write container config %q", configPath) } logger.Tracef("wrote initial config %q for container %q", configPath, lxcContainer.Name()) var err error var execEnv []string = nil var closer func() if caCert != nil { execEnv, closer, err = wgetEnvironment(caCert) if err != nil { return errors.Annotatef(err, "failed to get environment for wget execution") } defer closer() } // Create the container. logger.Debugf("creating lxc container %q", lxcContainer.Name()) logger.Debugf("lxc-create template params: %v", templateParams) if err := lxcContainer.Create(configPath, defaultTemplate, extraCreateArgs, templateParams, execEnv); err != nil { return errors.Wrap(err, instance.NewRetryableCreationError("lxc container creation failed: "+lxcContainer.Name())) } return nil }
func (s *ProvisionerSuite) TestProvisionerFailedStartInstanceWithInjectedCreationError(c *gc.C) { // Set the retry delay to 0, and retry count to 2 to keep tests short s.PatchValue(provisioner.RetryStrategyDelay, 0*time.Second) s.PatchValue(provisioner.RetryStrategyCount, 2) // create the error injection channel errorInjectionChannel := make(chan error, 3) p := s.newEnvironProvisioner(c) defer stop(c, p) // patch the dummy provider error injection channel cleanup := dummy.PatchTransientErrorInjectionChannel(errorInjectionChannel) defer cleanup() retryableError := instance.NewRetryableCreationError("container failed to start and was destroyed") destroyError := errors.New("container failed to start and failed to destroy: manual cleanup of containers needed") // send the error message three times, because the provisioner will retry twice as patched above. errorInjectionChannel <- retryableError errorInjectionChannel <- retryableError errorInjectionChannel <- destroyError m, err := s.addMachine() c.Assert(err, jc.ErrorIsNil) s.checkNoOperations(c) t0 := time.Now() for time.Since(t0) < coretesting.LongWait { // And check the machine status is set to error. statusInfo, err := m.Status() c.Assert(err, jc.ErrorIsNil) if statusInfo.Status == state.StatusPending { time.Sleep(coretesting.ShortWait) continue } c.Assert(statusInfo.Status, gc.Equals, state.StatusError) // check that the status matches the error message c.Assert(statusInfo.Message, gc.Equals, destroyError.Error()) return } c.Fatal("Test took too long to complete") }
func (s *ProvisionerSuite) TestProvisionerSucceedStartInstanceWithInjectedRetryableCreationError(c *gc.C) { // create the error injection channel errorInjectionChannel := make(chan error, 1) c.Assert(errorInjectionChannel, gc.NotNil) p := s.newEnvironProvisioner(c) defer stop(c, p) // patch the dummy provider error injection channel cleanup := dummy.PatchTransientErrorInjectionChannel(errorInjectionChannel) defer cleanup() // send the error message once // - instance creation should succeed retryableError := instance.NewRetryableCreationError("container failed to start and was destroyed") errorInjectionChannel <- retryableError m, err := s.addMachine() c.Assert(err, jc.ErrorIsNil) s.checkStartInstanceNoSecureConnection(c, m) }
func (s *ProvisionerSuite) TestProvisionerFailedStartInstanceWithInjectedCreationError(c *gc.C) { // create the error injection channel errorInjectionChannel := make(chan error, 2) p := s.newEnvironProvisioner(c) defer stop(c, p) // patch the dummy provider error injection channel cleanup := dummy.PatchTransientErrorInjectionChannel(errorInjectionChannel) defer cleanup() retryableError := instance.NewRetryableCreationError("container failed to start and was destroyed") destroyError := errors.New("container failed to start and failed to destroy: manual cleanup of containers needed") // send the error message TWICE, because the provisioner will retry only ONCE errorInjectionChannel <- retryableError errorInjectionChannel <- destroyError m, err := s.addMachine() c.Assert(err, jc.ErrorIsNil) s.checkNoOperations(c) t0 := time.Now() for time.Since(t0) < coretesting.LongWait { // And check the machine status is set to error. statusInfo, err := m.Status() c.Assert(err, jc.ErrorIsNil) if statusInfo.Status == state.StatusPending { time.Sleep(coretesting.ShortWait) continue } c.Assert(statusInfo.Status, gc.Equals, state.StatusError) // check that the status matches the error message c.Assert(statusInfo.Message, gc.Equals, destroyError.Error()) break } }
// CreateContainer creates or clones an LXC container. func (manager *containerManager) CreateContainer( instanceConfig *instancecfg.InstanceConfig, series string, networkConfig *container.NetworkConfig, storageConfig *container.StorageConfig, ) (inst instance.Instance, _ *instance.HardwareCharacteristics, err error) { // Check our preconditions if manager == nil { panic("manager is nil") } else if series == "" { panic("series not set") } else if networkConfig == nil { panic("networkConfig is nil") } else if storageConfig == nil { panic("storageConfig is nil") } // Log how long the start took defer func(start time.Time) { if err == nil { logger.Tracef("container %q started: %v", inst.Id(), time.Now().Sub(start)) } }(time.Now()) name := names.NewMachineTag(instanceConfig.MachineId).String() if manager.name != "" { name = fmt.Sprintf("%s-%s", manager.name, name) } // Create the cloud-init. directory, err := container.NewDirectory(name) if err != nil { return nil, nil, errors.Annotate(err, "failed to create a directory for the container") } logger.Tracef("write cloud-init") userDataFilename, err := containerinit.WriteUserData(instanceConfig, networkConfig, directory) if err != nil { return nil, nil, errors.Annotate(err, "failed to write user data") } var lxcContainer golxc.Container if manager.createWithClone { templateContainer, err := EnsureCloneTemplate( manager.backingFilesystem, series, networkConfig, instanceConfig.AuthorizedKeys, instanceConfig.AptProxySettings, instanceConfig.AptMirror, instanceConfig.EnableOSRefreshUpdate, instanceConfig.EnableOSUpgrade, manager.imageURLGetter, manager.useAUFS, ) if err != nil { return nil, nil, errors.Annotate(err, "failed to retrieve the template to clone") } templateParams := []string{ "--debug", // Debug errors in the cloud image "--userdata", userDataFilename, // Our groovey cloud-init "--hostid", name, // Use the container name as the hostid } var extraCloneArgs []string if manager.backingFilesystem == Btrfs || manager.useAUFS { extraCloneArgs = append(extraCloneArgs, "--snapshot") } if manager.backingFilesystem != Btrfs && manager.useAUFS { extraCloneArgs = append(extraCloneArgs, "--backingstore", "aufs") } lock, err := AcquireTemplateLock(templateContainer.Name(), "clone") if err != nil { return nil, nil, errors.Annotate(err, "failed to acquire lock on template") } defer lock.Unlock() // Ensure the run-time effective config of the template // container has correctly ordered network settings, otherwise // Clone() below will fail. This is needed in case we haven't // created a new template now but are reusing an existing one. // See LP bug #1414016. configPath := containerConfigFilename(templateContainer.Name()) if _, err := reorderNetworkConfig(configPath); err != nil { return nil, nil, errors.Annotate(err, "failed to reorder network settings") } lxcContainer, err = templateContainer.Clone(name, extraCloneArgs, templateParams) if err != nil { return nil, nil, errors.Annotate(err, "lxc container cloning failed") } } else { // Note here that the lxcObjectFacotry only returns a valid container // object, and doesn't actually construct the underlying lxc container on // disk. lxcContainer = LxcObjectFactory.New(name) templateParams := []string{ "--debug", // Debug errors in the cloud image "--userdata", userDataFilename, // Our groovey cloud-init "--hostid", name, // Use the container name as the hostid "-r", series, } var caCert []byte if manager.imageURLGetter != nil { arch := arch.HostArch() imageURL, err := manager.imageURLGetter.ImageURL(instance.LXC, series, arch) if err != nil { return nil, nil, errors.Annotatef(err, "cannot determine cached image URL") } templateParams = append(templateParams, "-T", imageURL) caCert = manager.imageURLGetter.CACert() } err = createContainer( lxcContainer, directory, networkConfig, nil, templateParams, caCert, ) if err != nil { return nil, nil, errors.Trace(err) } } if err := autostartContainer(name); err != nil { return nil, nil, errors.Annotate(err, "failed to configure the container for autostart") } if err := mountHostLogDir(name, manager.logdir); err != nil { return nil, nil, errors.Annotate(err, "failed to mount the directory to log to") } if storageConfig.AllowMount { // Add config to allow loop devices to be mounted inside the container. if err := allowLoopbackBlockDevices(name); err != nil { return nil, nil, errors.Annotate(err, "failed to configure the container for loopback devices") } } // Update the network settings inside the run-time config of the // container (e.g. /var/lib/lxc/<name>/config) before starting it. netConfig := generateNetworkConfig(networkConfig) if err := updateContainerConfig(name, netConfig); err != nil { return nil, nil, errors.Annotate(err, "failed to update network config") } configPath := containerConfigFilename(name) logger.Tracef("updated network config in %q for container %q", configPath, name) // Ensure the run-time config of the new container has correctly // ordered network settings, otherwise Start() below will fail. We // need this now because after lxc-create or lxc-clone the initial // lxc.conf generated inside createContainer gets merged with // other settings (e.g. system-wide overrides, changes made by // hooks, etc.) and the result can still be incorrectly ordered. // See LP bug #1414016. if _, err := reorderNetworkConfig(configPath); err != nil { return nil, nil, errors.Annotate(err, "failed to reorder network settings") } // To speed-up the initial container startup we pre-render the // /etc/network/interfaces directly inside the rootfs. This won't // work if we use AUFS snapshots, so it's disabled if useAUFS is // true (for now). if networkConfig != nil && len(networkConfig.Interfaces) > 0 { interfacesFile := filepath.Join(LxcContainerDir, name, "rootfs", etcNetworkInterfaces) if manager.useAUFS { logger.Tracef("not pre-rendering %q when using AUFS-backed rootfs", interfacesFile) } else { data, err := containerinit.GenerateNetworkConfig(networkConfig) if err != nil { return nil, nil, errors.Annotatef(err, "failed to generate %q", interfacesFile) } if err := utils.AtomicWriteFile(interfacesFile, []byte(data), 0644); err != nil { return nil, nil, errors.Annotatef(err, "cannot write generated %q", interfacesFile) } logger.Tracef("pre-rendered network config in %q", interfacesFile) } } // Start the lxc container with the appropriate settings for // grabbing the console output and a log file. consoleFile := filepath.Join(directory, "console.log") lxcContainer.SetLogFile(filepath.Join(directory, "container.log"), golxc.LogDebug) logger.Tracef("start the container") // We explicitly don't pass through the config file to the container.Start // method as we have passed it through at container creation time. This // is necessary to get the appropriate rootfs reference without explicitly // setting it ourselves. if err = lxcContainer.Start("", consoleFile); err != nil { logger.Warningf("container failed to start %v", err) // if the container fails to start we should try to destroy it // check if the container has been constructed if lxcContainer.IsConstructed() { // if so, then we need to destroy the leftover container if derr := lxcContainer.Destroy(); derr != nil { // if an error is reported there is probably a leftover // container that the user should clean up manually logger.Errorf("container failed to start and failed to destroy: %v", derr) return nil, nil, errors.Annotate(err, "container failed to start and failed to destroy: manual cleanup of containers needed") } logger.Warningf("container failed to start and was destroyed - safe to retry") return nil, nil, errors.Wrap(err, instance.NewRetryableCreationError("container failed to start and was destroyed: "+lxcContainer.Name())) } logger.Warningf("container failed to start: %v", err) return nil, nil, errors.Annotate(err, "container failed to start") } hardware := &instance.HardwareCharacteristics{ Arch: &version.Current.Arch, } return &lxcInstance{lxcContainer, name}, hardware, nil }