func countCollections(c *arvados.Client, params arvados.ResourceListParams) (int, error) { var page arvados.CollectionList var zero int params.Limit = &zero err := c.RequestAndDecode(&page, "GET", "arvados/v1/collections", nil, params) return page.ItemsAvailable, err }
// Perform a PUT request at path, with data (as JSON) in the request // body. func (srv *KeepService) put(c *arvados.Client, path string, data interface{}) error { // We'll start a goroutine to do the JSON encoding, so we can // stream it to the http client through a Pipe, rather than // keeping the entire encoded version in memory. jsonR, jsonW := io.Pipe() // errC communicates any encoding errors back to our main // goroutine. errC := make(chan error, 1) go func() { enc := json.NewEncoder(jsonW) errC <- enc.Encode(data) jsonW.Close() }() url := srv.URLBase() + "/" + path req, err := http.NewRequest("PUT", url, ioutil.NopCloser(jsonR)) if err != nil { return fmt.Errorf("building request for %s: %v", url, err) } err = c.DoAndDecode(nil, req) // If there was an error encoding the request body, report // that instead of the response: obviously we won't get a // useful response if our request wasn't properly encoded. if encErr := <-errC; encErr != nil { return fmt.Errorf("encoding data for %s: %v", url, encErr) } return err }
// CheckSanityEarly checks for configuration and runtime errors that // can be detected before GetCurrentState() and ComputeChangeSets() // are called. // // If it returns an error, it is pointless to run GetCurrentState or // ComputeChangeSets: after doing so, the statistics would be // meaningless and it would be dangerous to run any Commit methods. func (bal *Balancer) CheckSanityEarly(c *arvados.Client) error { u, err := c.CurrentUser() if err != nil { return fmt.Errorf("CurrentUser(): %v", err) } if !u.IsActive || !u.IsAdmin { return fmt.Errorf("current user (%s) is not an active admin user", u.UUID) } for _, srv := range bal.KeepServices { if srv.ServiceType == "proxy" { return fmt.Errorf("config error: %s: proxy servers cannot be balanced", srv) } } return nil }
// DiscoverKeepServices sets the list of KeepServices by calling the // API to get a list of all services, and selecting the ones whose // ServiceType is in okTypes. func (bal *Balancer) DiscoverKeepServices(c *arvados.Client, okTypes []string) error { bal.KeepServices = make(map[string]*KeepService) ok := make(map[string]bool) for _, t := range okTypes { ok[t] = true } return c.EachKeepService(func(srv arvados.KeepService) error { if ok[srv.ServiceType] { bal.KeepServices[srv.UUID] = &KeepService{ KeepService: srv, ChangeSet: &ChangeSet{}, } } else { bal.logf("skipping %v with service type %q", srv.UUID, srv.ServiceType) } return nil }) }
// GetCurrentState determines the current replication state, and the // desired replication level, for every block that is either // retrievable or referenced. // // It determines the current replication state by reading the block index // from every known Keep service. // // It determines the desired replication level by retrieving all // collection manifests in the database (API server). // // It encodes the resulting information in BlockStateMap. func (bal *Balancer) GetCurrentState(c *arvados.Client) error { defer timeMe(bal.Logger, "GetCurrentState")() bal.BlockStateMap = NewBlockStateMap() dd, err := c.DiscoveryDocument() if err != nil { return err } bal.DefaultReplication = dd.DefaultCollectionReplication bal.MinMtime = time.Now().Unix() - dd.BlobSignatureTTL errs := make(chan error, 2+len(bal.KeepServices)) wg := sync.WaitGroup{} // Start one goroutine for each KeepService: retrieve the // index, and add the returned blocks to BlockStateMap. for _, srv := range bal.KeepServices { wg.Add(1) go func(srv *KeepService) { defer wg.Done() bal.logf("%s: retrieve index", srv) idx, err := srv.Index(c, "") if err != nil { errs <- fmt.Errorf("%s: %v", srv, err) return } bal.logf("%s: add %d replicas to map", srv, len(idx)) bal.BlockStateMap.AddReplicas(srv, idx) bal.logf("%s: done", srv) }(srv) } // collQ buffers incoming collections so we can start fetching // the next page without waiting for the current page to // finish processing. (1000 happens to match the page size // used by (*arvados.Client)EachCollection(), but it's OK if // they don't match.) collQ := make(chan arvados.Collection, 1000) // Start a goroutine to process collections. (We could use a // worker pool here, but even with a single worker we already // process collections much faster than we can retrieve them.) wg.Add(1) go func() { defer wg.Done() for coll := range collQ { err := bal.addCollection(coll) if err != nil { errs <- err for range collQ { } return } bal.collScanned++ } }() // Start a goroutine to retrieve all collections from the // Arvados database and send them to collQ for processing. wg.Add(1) go func() { defer wg.Done() err = EachCollection(c, func(coll arvados.Collection) error { collQ <- coll if len(errs) > 0 { // some other GetCurrentState // error happened: no point // getting any more // collections. return fmt.Errorf("") } return nil }, func(done, total int) { bal.logf("collections: %d/%d", done, total) }) close(collQ) if err != nil { errs <- err } }() go func() { // Send a nil error when all goroutines finish. If // this is the first error sent to errs, then // everything worked. wg.Wait() errs <- nil }() return <-errs }
// EachCollection calls f once for every readable // collection. EachCollection stops if it encounters an error, such as // f returning a non-nil error. // // The progress function is called periodically with done (number of // times f has been called) and total (number of times f is expected // to be called). func EachCollection(c *arvados.Client, f func(arvados.Collection) error, progress func(done, total int)) error { if progress == nil { progress = func(_, _ int) {} } expectCount, err := countCollections(c, arvados.ResourceListParams{}) if err != nil { return err } limit := 1000 params := arvados.ResourceListParams{ Limit: &limit, Order: "modified_at, uuid", Select: []string{"uuid", "manifest_text", "modified_at", "portable_data_hash", "replication_desired"}, } var last arvados.Collection var filterTime time.Time callCount := 0 for { progress(callCount, expectCount) var page arvados.CollectionList err := c.RequestAndDecode(&page, "GET", "arvados/v1/collections", nil, params) if err != nil { return err } for _, coll := range page.Items { if last.ModifiedAt != nil && *last.ModifiedAt == *coll.ModifiedAt && last.UUID >= coll.UUID { continue } callCount++ err = f(coll) if err != nil { return err } last = coll } if last.ModifiedAt == nil || *last.ModifiedAt == filterTime { if page.ItemsAvailable > len(page.Items) { // TODO: use "mtime=X && UUID>Y" // filters to get all collections with // this timestamp, then use "mtime>X" // to get the next timestamp. return fmt.Errorf("BUG: Received an entire page with the same modified_at timestamp (%v), cannot make progress", filterTime) } break } filterTime = *last.ModifiedAt params.Filters = []arvados.Filter{{ Attr: "modified_at", Operator: ">=", Operand: filterTime, }, { Attr: "uuid", Operator: "!=", Operand: last.UUID, }} } progress(callCount, expectCount) if checkCount, err := countCollections(c, arvados.ResourceListParams{Filters: []arvados.Filter{{ Attr: "modified_at", Operator: "<=", Operand: filterTime}}}); err != nil { return err } else if callCount < checkCount { return fmt.Errorf("Retrieved %d collections with modtime <= T=%q, but server now reports there are %d collections with modtime <= T", callCount, filterTime, checkCount) } return nil }