Example #1
0
// GetCurrentState determines the current replication state, and the
// desired replication level, for every block that is either
// retrievable or referenced.
//
// It determines the current replication state by reading the block index
// from every known Keep service.
//
// It determines the desired replication level by retrieving all
// collection manifests in the database (API server).
//
// It encodes the resulting information in BlockStateMap.
func (bal *Balancer) GetCurrentState(c *arvados.Client) error {
	defer timeMe(bal.Logger, "GetCurrentState")()
	bal.BlockStateMap = NewBlockStateMap()

	dd, err := c.DiscoveryDocument()
	if err != nil {
		return err
	}
	bal.DefaultReplication = dd.DefaultCollectionReplication
	bal.MinMtime = time.Now().Unix() - dd.BlobSignatureTTL

	errs := make(chan error, 2+len(bal.KeepServices))
	wg := sync.WaitGroup{}

	// Start one goroutine for each KeepService: retrieve the
	// index, and add the returned blocks to BlockStateMap.
	for _, srv := range bal.KeepServices {
		wg.Add(1)
		go func(srv *KeepService) {
			defer wg.Done()
			bal.logf("%s: retrieve index", srv)
			idx, err := srv.Index(c, "")
			if err != nil {
				errs <- fmt.Errorf("%s: %v", srv, err)
				return
			}
			bal.logf("%s: add %d replicas to map", srv, len(idx))
			bal.BlockStateMap.AddReplicas(srv, idx)
			bal.logf("%s: done", srv)
		}(srv)
	}

	// collQ buffers incoming collections so we can start fetching
	// the next page without waiting for the current page to
	// finish processing. (1000 happens to match the page size
	// used by (*arvados.Client)EachCollection(), but it's OK if
	// they don't match.)
	collQ := make(chan arvados.Collection, 1000)

	// Start a goroutine to process collections. (We could use a
	// worker pool here, but even with a single worker we already
	// process collections much faster than we can retrieve them.)
	wg.Add(1)
	go func() {
		defer wg.Done()
		for coll := range collQ {
			err := bal.addCollection(coll)
			if err != nil {
				errs <- err
				for range collQ {
				}
				return
			}
			bal.collScanned++
		}
	}()

	// Start a goroutine to retrieve all collections from the
	// Arvados database and send them to collQ for processing.
	wg.Add(1)
	go func() {
		defer wg.Done()
		err = EachCollection(c,
			func(coll arvados.Collection) error {
				collQ <- coll
				if len(errs) > 0 {
					// some other GetCurrentState
					// error happened: no point
					// getting any more
					// collections.
					return fmt.Errorf("")
				}
				return nil
			}, func(done, total int) {
				bal.logf("collections: %d/%d", done, total)
			})
		close(collQ)
		if err != nil {
			errs <- err
		}
	}()

	go func() {
		// Send a nil error when all goroutines finish. If
		// this is the first error sent to errs, then
		// everything worked.
		wg.Wait()
		errs <- nil
	}()
	return <-errs
}