// GetCurrentState determines the current replication state, and the // desired replication level, for every block that is either // retrievable or referenced. // // It determines the current replication state by reading the block index // from every known Keep service. // // It determines the desired replication level by retrieving all // collection manifests in the database (API server). // // It encodes the resulting information in BlockStateMap. func (bal *Balancer) GetCurrentState(c *arvados.Client) error { defer timeMe(bal.Logger, "GetCurrentState")() bal.BlockStateMap = NewBlockStateMap() dd, err := c.DiscoveryDocument() if err != nil { return err } bal.DefaultReplication = dd.DefaultCollectionReplication bal.MinMtime = time.Now().Unix() - dd.BlobSignatureTTL errs := make(chan error, 2+len(bal.KeepServices)) wg := sync.WaitGroup{} // Start one goroutine for each KeepService: retrieve the // index, and add the returned blocks to BlockStateMap. for _, srv := range bal.KeepServices { wg.Add(1) go func(srv *KeepService) { defer wg.Done() bal.logf("%s: retrieve index", srv) idx, err := srv.Index(c, "") if err != nil { errs <- fmt.Errorf("%s: %v", srv, err) return } bal.logf("%s: add %d replicas to map", srv, len(idx)) bal.BlockStateMap.AddReplicas(srv, idx) bal.logf("%s: done", srv) }(srv) } // collQ buffers incoming collections so we can start fetching // the next page without waiting for the current page to // finish processing. (1000 happens to match the page size // used by (*arvados.Client)EachCollection(), but it's OK if // they don't match.) collQ := make(chan arvados.Collection, 1000) // Start a goroutine to process collections. (We could use a // worker pool here, but even with a single worker we already // process collections much faster than we can retrieve them.) wg.Add(1) go func() { defer wg.Done() for coll := range collQ { err := bal.addCollection(coll) if err != nil { errs <- err for range collQ { } return } bal.collScanned++ } }() // Start a goroutine to retrieve all collections from the // Arvados database and send them to collQ for processing. wg.Add(1) go func() { defer wg.Done() err = EachCollection(c, func(coll arvados.Collection) error { collQ <- coll if len(errs) > 0 { // some other GetCurrentState // error happened: no point // getting any more // collections. return fmt.Errorf("") } return nil }, func(done, total int) { bal.logf("collections: %d/%d", done, total) }) close(collQ) if err != nil { errs <- err } }() go func() { // Send a nil error when all goroutines finish. If // this is the first error sent to errs, then // everything worked. wg.Wait() errs <- nil }() return <-errs }