Example #1
0
// getImageList reads the list of previously processed images from the imageList file.
func getImageList(processedImages collector.ImageSet) (e error) {
	f, e := os.Open(*imageList)
	if e != nil {
		except.Warn(e, ": Error in opening", *imageList, ": perhaps a fresh start?")
		return
	}
	defer f.Close()
	r := bufio.NewReader(f)
	data, e := ioutil.ReadAll(r)
	if e != nil {
		except.Error(e, ": Error in reading file ", *imageList)
		return
	}
	for _, str := range strings.Split(string(data), "\n") {
		if len(str) != 0 {
			blog.Debug("Previous image: %s", str)
			processedImages.Insert(collector.ImageIDType(str))
		}
	}
	return
}
Example #2
0
// DoIteration runs one iteration of the main loop to get new images, extract data from them,
// and saves results.
func DoIteration(ReposToLimit RepoSet, authToken string,
	processedImages collector.ImageSet, oldMetadataSet collector.MetadataSet,
	PulledList []collector.ImageMetadataInfo) (currentMetadataSet collector.MetadataSet,
	PulledNew []collector.ImageMetadataInfo) {

	blog.Debug("DoIteration: processedImages is %v", processedImages)
	PulledNew = PulledList
	metadataSlice, currentMetadataSet := collector.GetNewImageMetadata(oldMetadataSet)

	if len(metadataSlice) == 0 {
		blog.Info("No new metadata in this iteration")
		return
	}
	blog.Info("Obtained %d new metadata items in this iteration", len(metadataSlice))
	collector.SaveImageMetadata(metadataSlice)

	// number of images processed for each repository in this iteration
	imageCount := make(map[collector.RepoType]int)

	// Set of repos to stop limiting according to maxImages after this iteration completes.
	StopLimiting := NewRepoSet()

	// processed metadata
	processedMetadata := collector.NewMetadataSet()

	for {
		pulledImages := collector.NewImageSet()
		pulledImagesManifestHash := collector.NewImageSet()
		pullErrorMetadata := collector.NewMetadataSet()
		for index, _ := range metadataSlice {
			metadata := &metadataSlice[index]
			processedMetadata.Insert(*metadata)
			if config.FilterRepos && !collector.ReposToProcess[collector.RepoType(metadata.Repo)] {
				continue
			}
			// TODO: need to filter out images from ExcludedRepo also when collecting from local Docker host?
			if collector.ExcludeRepo[collector.RepoType(metadata.Repo)] {
				continue
			}
			if len(metadata.Image) > 0 && pulledImages.Exists(collector.ImageIDType(metadata.Image)) {
				continue
			}
			if len(metadata.ManifestHash) > 0 &&
				pulledImagesManifestHash.Exists(collector.ImageIDType(metadata.ManifestHash)) {
				continue
			}
			// TODO: need to consider maxImages limit also when collecting from local Docker host?
			repo := collector.RepoType(metadata.Repo)
			if _, ok := ReposToLimit[repo]; !ok {
				// new repo we haven't seen before; apply maxImages limit to repo
				blog.Info("Starting to apply maxImages limit to repo %s", string(repo))
				ReposToLimit[repo] = true
			}
			if ReposToLimit[repo] && *maxImages > 0 && imageCount[repo] >= *maxImages {
				blog.Info("Max image count %d reached for %s, skipping :%s",
					*maxImages, metadata.Repo, metadata.Tag)
				// stop applying the maxImages limit to repo
				StopLimiting[repo] = true
				continue
			}
			if len(metadata.Image) > 0 && processedImages.Exists(collector.ImageIDType(metadata.Image)) {
				continue
			}
			if len(metadata.ManifestHash) > 0 && processedImages.Exists(collector.ImageIDType(metadata.ManifestHash)) {
				continue
			}

			imageCount[collector.RepoType(metadata.Repo)]++

			ImageLenBeforePull := len(metadata.Image)

			// docker pull image
			if !collector.LocalHost {
				err := collector.PullImage(metadata)
				if err != nil {
					// docker pull failed for some reason, possibly a transient failure.
					// So we remove this metadata element from the current and processed sets,
					// and move on to process any remaining metadata elements.
					// In the next iteration, metadata
					// lookup may rediscover this deleted metadata element
					// and treat it as new, thus ensuring that the image pull will be retried.
					// TODO: If the registry is corrupted, this can lead to an infinite
					// loop in which the same image pull keeps getting tried and consistently fails.
					currentMetadataSet.Delete(*metadata)
					processedMetadata.Delete(*metadata)
					// remember this pull error in order to demote this metadata to the end of the slice.
					pullErrorMetadata.Insert(*metadata)
					err = collector.RemoveDanglingImages()
					if err != nil {
						except.Error(err, ": RemoveDanglingImages")
					}
					continue
				}
				updateRepoTagImageID(metadata, oldMetadataSet)
				processedMetadata.Replace(*metadata)
				if ImageLenBeforePull == 0 && len(metadata.Image) > 0 {
					// Docker daemon computed the image ID for us, so now we can record this entry.
					collector.SaveImageMetadata([]collector.ImageMetadataInfo{*metadata})
				}
			}
			PulledNew = append(PulledNew, *metadata)
			excess := len(PulledNew) - *removeThresh
			if !collector.LocalHost && *removeThresh > 0 && excess > 0 {
				config.BanyanUpdate("Removing " + strconv.Itoa(excess) + " pulled images")
				collector.RemoveImages(PulledNew[0:excess])
				PulledNew = PulledNew[excess:]
			}
			blog.Info("Added image %s to pulledImages", metadata.Image)
			pulledImages.Insert(collector.ImageIDType(metadata.Image))
			pulledImagesManifestHash.Insert(collector.ImageIDType(metadata.ManifestHash))
			if len(pulledImages) == IMAGEBATCH {
				break
			}
		}

		if len(pulledImages) == 0 {
			blog.Info("No pulled images left to process in this iteration")
			config.BanyanUpdate("No pulled images left to process in this iteration")
			break
		}

		// reorder metadataSlice by moving images that couldn't be pulled to the end of the list
		newMDSlice := []collector.ImageMetadataInfo{}
		for _, metadata := range metadataSlice {
			if !pullErrorMetadata.Exists(metadata) {
				newMDSlice = append(newMDSlice, metadata)
			}
		}
		for metadata := range pullErrorMetadata {
			newMDSlice = append(newMDSlice, metadata)
		}
		metadataSlice = newMDSlice

		// get and save image data for all the images in pulledimages
		outMapMap := collector.GetImageAllData(pulledImages)
		collector.SaveImageAllData(outMapMap)
		for imageID := range pulledImages {
			processedImages.Insert(imageID)
		}
		for manifestHash := range pulledImagesManifestHash {
			processedImages.Insert(manifestHash)
		}
		if e := persistImageList(pulledImages); e != nil {
			except.Error(e, "Failed to persist list of collected images")
		}
		if e := persistImageManifestHashList(pulledImagesManifestHash); e != nil {
			except.Error(e, "Failed to persist list of collected image manifest hashes")
		}
		if checkConfigUpdate(false) == true {
			// Config changed, and possibly did so before all current metadata was processed.
			// Thus, remember only the metadata that has already been processed, and forget
			// metadata that has not been processed yet.
			// That way, the next time DoIteration() is entered, the metadata lookup
			// will correctly schedule the forgotten metadata for processing, along with
			// any new metadata.
			currentMetadataSet = processedMetadata
			break
		}
	}

	for repo := range StopLimiting {
		blog.Info("No longer enforcing maxImages limit on repo %s", repo)
		ReposToLimit[repo] = false
	}
	return
}