// getImageList reads the list of previously processed images from the imageList file. func getImageList(processedImages collector.ImageSet) (e error) { f, e := os.Open(*imageList) if e != nil { except.Warn(e, ": Error in opening", *imageList, ": perhaps a fresh start?") return } defer f.Close() r := bufio.NewReader(f) data, e := ioutil.ReadAll(r) if e != nil { except.Error(e, ": Error in reading file ", *imageList) return } for _, str := range strings.Split(string(data), "\n") { if len(str) != 0 { blog.Debug("Previous image: %s", str) processedImages[collector.ImageIDType(str)] = true } } return }
// getImageManifestHashList reads the list of previously processed images (manifest hash) from the imageList_ManifestHash file. func getImageManifestHashList(processedImagesManifestHash collector.ImageSet) (e error) { filename := *imageList + "_ManifestHash" f, e := os.Open(filename) if e != nil { except.Warn(e, ": Error in opening", filename, ": perhaps a fresh start?") return } defer f.Close() r := bufio.NewReader(f) data, e := ioutil.ReadAll(r) if e != nil { except.Error(e, ": Error in reading file ", filename) return } for _, str := range strings.Split(string(data), "\n") { if len(str) != 0 { blog.Debug("Previous image: %s", str) processedImagesManifestHash.Insert(collector.ImageIDType(str)) } } return }
// DoIteration runs one iteration of the main loop to get new images, extract data from them, // and saves results. func DoIteration(ReposToLimit RepoSet, authToken string, processedImages collector.ImageSet, oldMetadataSet collector.MetadataSet, PulledList []collector.ImageMetadataInfo) (currentMetadataSet collector.MetadataSet, PulledNew []collector.ImageMetadataInfo) { blog.Debug("DoIteration: processedImages is %v", processedImages) PulledNew = PulledList _ /*tagSlice*/, metadataSlice, currentMetadataSet := collector.GetNewImageMetadata(oldMetadataSet) if len(metadataSlice) == 0 { blog.Info("No new metadata in this iteration") return } blog.Info("Obtained %d new metadata items in this iteration", len(metadataSlice)) collector.SaveImageMetadata(metadataSlice) // number of images processed for each repository in this iteration imageCount := make(map[collector.RepoType]int) // Set of repos to stop limiting according to maxImages after this iteration completes. StopLimiting := NewRepoSet() // processed metadata processedMetadata := collector.NewMetadataSet() for { pulledImages := collector.NewImageSet() pullErrorMetadata := collector.NewMetadataSet() for _, metadata := range metadataSlice { processedMetadata.Insert(metadata) if config.FilterRepos && !collector.ReposToProcess[collector.RepoType(metadata.Repo)] { continue } // TODO: need to filter out images from ExcludedRepo also when collecting from local Docker host? if collector.ExcludeRepo[collector.RepoType(metadata.Repo)] { continue } if pulledImages[collector.ImageIDType(metadata.Image)] { continue } // TODO: need to consider maxImages limit also when collecting from local Docker host? repo := collector.RepoType(metadata.Repo) if _, ok := ReposToLimit[repo]; !ok { // new repo we haven't seen before; apply maxImages limit to repo blog.Info("Starting to apply maxImages limit to repo %s", string(repo)) ReposToLimit[repo] = true } if ReposToLimit[repo] && *maxImages > 0 && imageCount[repo] >= *maxImages { blog.Info("Max image count %d reached for %s, skipping :%s", *maxImages, metadata.Repo, metadata.Tag) // stop applying the maxImages limit to repo StopLimiting[repo] = true continue } if processedImages[collector.ImageIDType(metadata.Image)] { continue } imageCount[collector.RepoType(metadata.Repo)]++ // docker pull image if !collector.LocalHost { err := collector.PullImage(metadata) if err != nil { // docker pull failed for some reason, possibly a transient failure. // So we remove this metadata element from the current and processed sets, // and move on to process any remaining metadata elements. // In the next iteration, metadata // lookup may rediscover this deleted metadata element // and treat it as new, thus ensuring that the image pull will be retried. // TODO: If the registry is corrupted, this can lead to an infinite // loop in which the same image pull keeps getting tried and consistently fails. currentMetadataSet.Delete(metadata) processedMetadata.Delete(metadata) // remember this pull error in order to demote this metadata to the end of the slice. pullErrorMetadata.Insert(metadata) err = collector.RemoveDanglingImages() if err != nil { except.Error(err, ": RemoveDanglingImages") } continue } } PulledNew = append(PulledNew, metadata) excess := len(PulledNew) - *removeThresh if !collector.LocalHost && *removeThresh > 0 && excess > 0 { config.BanyanUpdate("Removing " + strconv.Itoa(excess) + " pulled images") collector.RemoveImages(PulledNew[0:excess]) PulledNew = PulledNew[excess:] } pulledImages[collector.ImageIDType(metadata.Image)] = true if len(pulledImages) == IMAGEBATCH { break } } if len(pulledImages) == 0 { blog.Info("No pulled images left to process in this iteration") config.BanyanUpdate("No pulled images left to process in this iteration") break } // reorder metadataSlice by moving images that couldn't be pulled to the end of the list newMDSlice := []collector.ImageMetadataInfo{} for _, metadata := range metadataSlice { if !pullErrorMetadata.Exists(metadata) { newMDSlice = append(newMDSlice, metadata) } } for metadata := range pullErrorMetadata { newMDSlice = append(newMDSlice, metadata) } metadataSlice = newMDSlice // get and save image data for all the images in pulledimages outMapMap := collector.GetImageAllData(pulledImages) collector.SaveImageAllData(outMapMap) for imageID := range pulledImages { processedImages[imageID] = true } if e := persistImageList(pulledImages); e != nil { except.Error(e, "Failed to persist list of collected images") } if checkConfigUpdate(false) == true { // Config changed, and possibly did so before all current metadata was processed. // Thus, remember only the metadata that has already been processed, and forget // metadata that has not been processed yet. // That way, the next time DoIteration() is entered, the metadata lookup // will correctly schedule the forgotten metadata for processing, along with // any new metadata. currentMetadataSet = processedMetadata break } } for repo := range StopLimiting { blog.Info("No longer enforcing maxImages limit on repo %s", repo) ReposToLimit[repo] = false } return }
// DoIteration runs one iteration of the main loop to get new images, extract data from them, // and saves results. func DoIteration(ReposToLimit RepoSet, authToken string, processedImages collector.ImageSet, oldMetadataSet collector.MetadataSet, PulledList []collector.ImageMetadataInfo) (currentMetadataSet collector.MetadataSet, PulledNew []collector.ImageMetadataInfo) { blog.Debug("DoIteration: processedImages is %v", processedImages) PulledNew = PulledList _ /*tagSlice*/, metadataSlice, currentMetadataSet := collector.GetNewImageMetadata(oldMetadataSet) if len(metadataSlice) == 0 { blog.Info("No new metadata in this iteration") return } blog.Info("Obtained %d new metadata items in this iteration", len(metadataSlice)) collector.SaveImageMetadata(metadataSlice) // number of images processed for each repository in this iteration imageCount := make(map[collector.RepoType]int) imageToMDMap := collector.GetImageToMDMap(metadataSlice) // Set of repos to stop limiting according to maxImages after this iteration completes. StopLimiting := NewRepoSet() for { pulledImages := collector.NewImageSet() for _, metadata := range metadataSlice { if config.FilterRepos && !collector.ReposToProcess[collector.RepoType(metadata.Repo)] { continue } // TODO: need to filter out images from ExcludedRepo also when collecting from local Docker host? if collector.ExcludeRepo[collector.RepoType(metadata.Repo)] { continue } if pulledImages[collector.ImageIDType(metadata.Image)] { continue } // TODO: need to consider maxImages limit also when collecting from local Docker host? repo := collector.RepoType(metadata.Repo) if _, ok := ReposToLimit[repo]; !ok { // new repo we haven't seen before; apply maxImages limit to repo blog.Info("Starting to apply maxImages limit to repo %s", string(repo)) ReposToLimit[repo] = true } if ReposToLimit[repo] && *maxImages > 0 && imageCount[repo] >= *maxImages { blog.Info("Max image count %d reached for %s, skipping :%s", *maxImages, metadata.Repo, metadata.Tag) // stop applying the maxImages limit to repo StopLimiting[repo] = true continue } if processedImages[collector.ImageIDType(metadata.Image)] { continue } imageCount[collector.RepoType(metadata.Repo)]++ // docker pull image if !collector.LocalHost { collector.PullImage(metadata) } PulledNew = append(PulledNew, metadata) if !collector.LocalHost && *removeThresh > 0 && len(PulledNew) > *removeThresh { collector.RemoveImages(PulledNew[0:*removeThresh], imageToMDMap) PulledNew = PulledNew[*removeThresh:] } pulledImages[collector.ImageIDType(metadata.Image)] = true if len(pulledImages) == IMAGEBATCH { break } } if len(pulledImages) == 0 { break } // get and save image data for all the images in pulledimages outMapMap := collector.GetImageAllData(pulledImages) collector.SaveImageAllData(outMapMap) for imageID := range pulledImages { processedImages[imageID] = true } if e := persistImageList(pulledImages); e != nil { blog.Error(e, "Failed to persist list of collected images") } if checkConfigUpdate(false) == true { break } } for repo := range StopLimiting { blog.Info("No longer enforcing maxImages limit on repo %s", repo) ReposToLimit[repo] = false } return }