Example #1
0
// DoIteration runs one iteration of the main loop to get new images, extract data from them,
// and saves results.
func DoIteration(ReposToLimit RepoSet, authToken string,
	processedImages collector.ImageSet, oldMetadataSet collector.MetadataSet,
	PulledList []collector.ImageMetadataInfo) (currentMetadataSet collector.MetadataSet,
	PulledNew []collector.ImageMetadataInfo) {
	blog.Debug("DoIteration: processedImages is %v", processedImages)
	PulledNew = PulledList
	_ /*tagSlice*/, metadataSlice, currentMetadataSet := collector.GetNewImageMetadata(oldMetadataSet)

	if len(metadataSlice) == 0 {
		blog.Info("No new metadata in this iteration")
		return
	}
	blog.Info("Obtained %d new metadata items in this iteration", len(metadataSlice))
	collector.SaveImageMetadata(metadataSlice)

	// number of images processed for each repository in this iteration
	imageCount := make(map[collector.RepoType]int)

	// Set of repos to stop limiting according to maxImages after this iteration completes.
	StopLimiting := NewRepoSet()

	// processed metadata
	processedMetadata := collector.NewMetadataSet()

	for {
		pulledImages := collector.NewImageSet()
		pullErrorMetadata := collector.NewMetadataSet()
		for _, metadata := range metadataSlice {
			processedMetadata.Insert(metadata)
			if config.FilterRepos && !collector.ReposToProcess[collector.RepoType(metadata.Repo)] {
				continue
			}
			// TODO: need to filter out images from ExcludedRepo also when collecting from local Docker host?
			if collector.ExcludeRepo[collector.RepoType(metadata.Repo)] {
				continue
			}
			if pulledImages[collector.ImageIDType(metadata.Image)] {
				continue
			}
			// TODO: need to consider maxImages limit also when collecting from local Docker host?
			repo := collector.RepoType(metadata.Repo)
			if _, ok := ReposToLimit[repo]; !ok {
				// new repo we haven't seen before; apply maxImages limit to repo
				blog.Info("Starting to apply maxImages limit to repo %s", string(repo))
				ReposToLimit[repo] = true
			}
			if ReposToLimit[repo] && *maxImages > 0 && imageCount[repo] >= *maxImages {
				blog.Info("Max image count %d reached for %s, skipping :%s",
					*maxImages, metadata.Repo, metadata.Tag)
				// stop applying the maxImages limit to repo
				StopLimiting[repo] = true
				continue
			}
			if processedImages[collector.ImageIDType(metadata.Image)] {
				continue
			}

			imageCount[collector.RepoType(metadata.Repo)]++

			// docker pull image
			if !collector.LocalHost {
				err := collector.PullImage(metadata)
				if err != nil {
					// docker pull failed for some reason, possibly a transient failure.
					// So we remove this metadata element from the current and processed sets,
					// and move on to process any remaining metadata elements.
					// In the next iteration, metadata
					// lookup may rediscover this deleted metadata element
					// and treat it as new, thus ensuring that the image pull will be retried.
					// TODO: If the registry is corrupted, this can lead to an infinite
					// loop in which the same image pull keeps getting tried and consistently fails.
					currentMetadataSet.Delete(metadata)
					processedMetadata.Delete(metadata)
					// remember this pull error in order to demote this metadata to the end of the slice.
					pullErrorMetadata.Insert(metadata)
					err = collector.RemoveDanglingImages()
					if err != nil {
						except.Error(err, ": RemoveDanglingImages")
					}
					continue
				}
			}
			PulledNew = append(PulledNew, metadata)
			excess := len(PulledNew) - *removeThresh
			if !collector.LocalHost && *removeThresh > 0 && excess > 0 {
				config.BanyanUpdate("Removing " + strconv.Itoa(excess) + " pulled images")
				collector.RemoveImages(PulledNew[0:excess])
				PulledNew = PulledNew[excess:]
			}
			pulledImages[collector.ImageIDType(metadata.Image)] = true
			if len(pulledImages) == IMAGEBATCH {
				break
			}
		}

		if len(pulledImages) == 0 {
			blog.Info("No pulled images left to process in this iteration")
			config.BanyanUpdate("No pulled images left to process in this iteration")
			break
		}

		// reorder metadataSlice by moving images that couldn't be pulled to the end of the list
		newMDSlice := []collector.ImageMetadataInfo{}
		for _, metadata := range metadataSlice {
			if !pullErrorMetadata.Exists(metadata) {
				newMDSlice = append(newMDSlice, metadata)
			}
		}
		for metadata := range pullErrorMetadata {
			newMDSlice = append(newMDSlice, metadata)
		}
		metadataSlice = newMDSlice

		// get and save image data for all the images in pulledimages
		outMapMap := collector.GetImageAllData(pulledImages)
		collector.SaveImageAllData(outMapMap)
		for imageID := range pulledImages {
			processedImages[imageID] = true
		}
		if e := persistImageList(pulledImages); e != nil {
			except.Error(e, "Failed to persist list of collected images")
		}
		if checkConfigUpdate(false) == true {
			// Config changed, and possibly did so before all current metadata was processed.
			// Thus, remember only the metadata that has already been processed, and forget
			// metadata that has not been processed yet.
			// That way, the next time DoIteration() is entered, the metadata lookup
			// will correctly schedule the forgotten metadata for processing, along with
			// any new metadata.
			currentMetadataSet = processedMetadata
			break
		}
	}

	for repo := range StopLimiting {
		blog.Info("No longer enforcing maxImages limit on repo %s", repo)
		ReposToLimit[repo] = false
	}
	return
}
Example #2
0
// DoIteration runs one iteration of the main loop to get new images, extract data from them,
// and saves results.
func DoIteration(ReposToLimit RepoSet, authToken string,
	processedImages collector.ImageSet, oldMetadataSet collector.MetadataSet,
	PulledList []collector.ImageMetadataInfo) (currentMetadataSet collector.MetadataSet,
	PulledNew []collector.ImageMetadataInfo) {
	blog.Debug("DoIteration: processedImages is %v", processedImages)
	PulledNew = PulledList
	_ /*tagSlice*/, metadataSlice, currentMetadataSet := collector.GetNewImageMetadata(oldMetadataSet)

	if len(metadataSlice) == 0 {
		blog.Info("No new metadata in this iteration")
		return
	}
	blog.Info("Obtained %d new metadata items in this iteration", len(metadataSlice))
	collector.SaveImageMetadata(metadataSlice)

	// number of images processed for each repository in this iteration
	imageCount := make(map[collector.RepoType]int)
	imageToMDMap := collector.GetImageToMDMap(metadataSlice)

	// Set of repos to stop limiting according to maxImages after this iteration completes.
	StopLimiting := NewRepoSet()

	for {
		pulledImages := collector.NewImageSet()
		for _, metadata := range metadataSlice {
			if config.FilterRepos && !collector.ReposToProcess[collector.RepoType(metadata.Repo)] {
				continue
			}
			// TODO: need to filter out images from ExcludedRepo also when collecting from local Docker host?
			if collector.ExcludeRepo[collector.RepoType(metadata.Repo)] {
				continue
			}
			if pulledImages[collector.ImageIDType(metadata.Image)] {
				continue
			}
			// TODO: need to consider maxImages limit also when collecting from local Docker host?
			repo := collector.RepoType(metadata.Repo)
			if _, ok := ReposToLimit[repo]; !ok {
				// new repo we haven't seen before; apply maxImages limit to repo
				blog.Info("Starting to apply maxImages limit to repo %s", string(repo))
				ReposToLimit[repo] = true
			}
			if ReposToLimit[repo] && *maxImages > 0 && imageCount[repo] >= *maxImages {
				blog.Info("Max image count %d reached for %s, skipping :%s",
					*maxImages, metadata.Repo, metadata.Tag)
				// stop applying the maxImages limit to repo
				StopLimiting[repo] = true
				continue
			}
			if processedImages[collector.ImageIDType(metadata.Image)] {
				continue
			}

			imageCount[collector.RepoType(metadata.Repo)]++

			// docker pull image
			if !collector.LocalHost {
				collector.PullImage(metadata)
			}
			PulledNew = append(PulledNew, metadata)
			if !collector.LocalHost && *removeThresh > 0 && len(PulledNew) > *removeThresh {
				collector.RemoveImages(PulledNew[0:*removeThresh], imageToMDMap)
				PulledNew = PulledNew[*removeThresh:]
			}
			pulledImages[collector.ImageIDType(metadata.Image)] = true
			if len(pulledImages) == IMAGEBATCH {
				break
			}
		}

		if len(pulledImages) == 0 {
			break
		}
		// get and save image data for all the images in pulledimages
		outMapMap := collector.GetImageAllData(pulledImages)
		collector.SaveImageAllData(outMapMap)
		for imageID := range pulledImages {
			processedImages[imageID] = true
		}
		if e := persistImageList(pulledImages); e != nil {
			blog.Error(e, "Failed to persist list of collected images")
		}
		if checkConfigUpdate(false) == true {
			break
		}
	}

	for repo := range StopLimiting {
		blog.Info("No longer enforcing maxImages limit on repo %s", repo)
		ReposToLimit[repo] = false
	}
	return
}