// getImageList reads the list of previously processed images from the imageList file. func getImageList(processedImages collector.ImageSet) (e error) { f, e := os.Open(*imageList) if e != nil { except.Warn(e, ": Error in opening", *imageList, ": perhaps a fresh start?") return } defer f.Close() r := bufio.NewReader(f) data, e := ioutil.ReadAll(r) if e != nil { except.Error(e, ": Error in reading file ", *imageList) return } for _, str := range strings.Split(string(data), "\n") { if len(str) != 0 { blog.Debug("Previous image: %s", str) processedImages.Insert(collector.ImageIDType(str)) } } return }
// DoIteration runs one iteration of the main loop to get new images, extract data from them, // and saves results. func DoIteration(ReposToLimit RepoSet, authToken string, processedImages collector.ImageSet, oldMetadataSet collector.MetadataSet, PulledList []collector.ImageMetadataInfo) (currentMetadataSet collector.MetadataSet, PulledNew []collector.ImageMetadataInfo) { blog.Debug("DoIteration: processedImages is %v", processedImages) PulledNew = PulledList metadataSlice, currentMetadataSet := collector.GetNewImageMetadata(oldMetadataSet) if len(metadataSlice) == 0 { blog.Info("No new metadata in this iteration") return } blog.Info("Obtained %d new metadata items in this iteration", len(metadataSlice)) collector.SaveImageMetadata(metadataSlice) // number of images processed for each repository in this iteration imageCount := make(map[collector.RepoType]int) // Set of repos to stop limiting according to maxImages after this iteration completes. StopLimiting := NewRepoSet() // processed metadata processedMetadata := collector.NewMetadataSet() for { pulledImages := collector.NewImageSet() pulledImagesManifestHash := collector.NewImageSet() pullErrorMetadata := collector.NewMetadataSet() for index, _ := range metadataSlice { metadata := &metadataSlice[index] processedMetadata.Insert(*metadata) if config.FilterRepos && !collector.ReposToProcess[collector.RepoType(metadata.Repo)] { continue } // TODO: need to filter out images from ExcludedRepo also when collecting from local Docker host? if collector.ExcludeRepo[collector.RepoType(metadata.Repo)] { continue } if len(metadata.Image) > 0 && pulledImages.Exists(collector.ImageIDType(metadata.Image)) { continue } if len(metadata.ManifestHash) > 0 && pulledImagesManifestHash.Exists(collector.ImageIDType(metadata.ManifestHash)) { continue } // TODO: need to consider maxImages limit also when collecting from local Docker host? repo := collector.RepoType(metadata.Repo) if _, ok := ReposToLimit[repo]; !ok { // new repo we haven't seen before; apply maxImages limit to repo blog.Info("Starting to apply maxImages limit to repo %s", string(repo)) ReposToLimit[repo] = true } if ReposToLimit[repo] && *maxImages > 0 && imageCount[repo] >= *maxImages { blog.Info("Max image count %d reached for %s, skipping :%s", *maxImages, metadata.Repo, metadata.Tag) // stop applying the maxImages limit to repo StopLimiting[repo] = true continue } if len(metadata.Image) > 0 && processedImages.Exists(collector.ImageIDType(metadata.Image)) { continue } if len(metadata.ManifestHash) > 0 && processedImages.Exists(collector.ImageIDType(metadata.ManifestHash)) { continue } imageCount[collector.RepoType(metadata.Repo)]++ ImageLenBeforePull := len(metadata.Image) // docker pull image if !collector.LocalHost { err := collector.PullImage(metadata) if err != nil { // docker pull failed for some reason, possibly a transient failure. // So we remove this metadata element from the current and processed sets, // and move on to process any remaining metadata elements. // In the next iteration, metadata // lookup may rediscover this deleted metadata element // and treat it as new, thus ensuring that the image pull will be retried. // TODO: If the registry is corrupted, this can lead to an infinite // loop in which the same image pull keeps getting tried and consistently fails. currentMetadataSet.Delete(*metadata) processedMetadata.Delete(*metadata) // remember this pull error in order to demote this metadata to the end of the slice. pullErrorMetadata.Insert(*metadata) err = collector.RemoveDanglingImages() if err != nil { except.Error(err, ": RemoveDanglingImages") } continue } updateRepoTagImageID(metadata, oldMetadataSet) processedMetadata.Replace(*metadata) if ImageLenBeforePull == 0 && len(metadata.Image) > 0 { // Docker daemon computed the image ID for us, so now we can record this entry. collector.SaveImageMetadata([]collector.ImageMetadataInfo{*metadata}) } } PulledNew = append(PulledNew, *metadata) excess := len(PulledNew) - *removeThresh if !collector.LocalHost && *removeThresh > 0 && excess > 0 { config.BanyanUpdate("Removing " + strconv.Itoa(excess) + " pulled images") collector.RemoveImages(PulledNew[0:excess]) PulledNew = PulledNew[excess:] } blog.Info("Added image %s to pulledImages", metadata.Image) pulledImages.Insert(collector.ImageIDType(metadata.Image)) pulledImagesManifestHash.Insert(collector.ImageIDType(metadata.ManifestHash)) if len(pulledImages) == IMAGEBATCH { break } } if len(pulledImages) == 0 { blog.Info("No pulled images left to process in this iteration") config.BanyanUpdate("No pulled images left to process in this iteration") break } // reorder metadataSlice by moving images that couldn't be pulled to the end of the list newMDSlice := []collector.ImageMetadataInfo{} for _, metadata := range metadataSlice { if !pullErrorMetadata.Exists(metadata) { newMDSlice = append(newMDSlice, metadata) } } for metadata := range pullErrorMetadata { newMDSlice = append(newMDSlice, metadata) } metadataSlice = newMDSlice // get and save image data for all the images in pulledimages outMapMap := collector.GetImageAllData(pulledImages) collector.SaveImageAllData(outMapMap) for imageID := range pulledImages { processedImages.Insert(imageID) } for manifestHash := range pulledImagesManifestHash { processedImages.Insert(manifestHash) } if e := persistImageList(pulledImages); e != nil { except.Error(e, "Failed to persist list of collected images") } if e := persistImageManifestHashList(pulledImagesManifestHash); e != nil { except.Error(e, "Failed to persist list of collected image manifest hashes") } if checkConfigUpdate(false) == true { // Config changed, and possibly did so before all current metadata was processed. // Thus, remember only the metadata that has already been processed, and forget // metadata that has not been processed yet. // That way, the next time DoIteration() is entered, the metadata lookup // will correctly schedule the forgotten metadata for processing, along with // any new metadata. currentMetadataSet = processedMetadata break } } for repo := range StopLimiting { blog.Info("No longer enforcing maxImages limit on repo %s", repo) ReposToLimit[repo] = false } return }