Пример #1
0
// Etcd is a configuration stored in etcd. It will be reloaded as soon
// as it changes.
func Etcd(path string, endpoints []string) Source {

	updates := make(chan pair, 1)
	req := make(chan context.Context)

	go func() {
		var c client.Client
		for i := 0; true; i++ {
			var err error
			c, err = client.New(client.Config{Endpoints: endpoints})
			if err != nil {
				log.Errorf("configuration: cannot connect to etcd: %v", err)
				updates <- pair{err: err}
				time.Sleep(timeutil.Backoff(1*time.Second, 60*time.Second, i))
				continue
			}
			break
		}
		log.V(2).Infof("configuration: connected to etcd")
		kapi := client.NewKeysAPI(c)

		r, err := kapi.Get(<-req, path, nil)
		if err != nil {
			updates <- pair{err: err}
		} else {
			updates <- pair{data: []byte(r.Node.Value)}
		}

		w := kapi.Watcher(path, nil)

		for i := 0; true; i++ {
			ctx := <-req
			r, err := w.Next(ctx)
			if err != nil {
				updates <- pair{err: err}
				time.Sleep(timeutil.Backoff(1*time.Second, 60*time.Second, i))
				continue
			}
			updates <- pair{data: []byte(r.Node.Value)}
		}

	}()

	return func(ctx context.Context) (data []byte, err error) {
		req <- ctx
		p := <-updates
		return p.data, p.err
	}

}
Пример #2
0
// performRequests does a request and returns the duration of the
// shortest refresh interval from all handled resources.
//
// If there's an error, it will be logged, and the returned interval
// will be increasing exponentially (basing on the passed retry
// number). The returned nextRetryNumber should be used in the next
// call to performRequests.
func (client *Client) performRequests(retryNumber int) (interval time.Duration, nextRetryNumber int) {
	// Creates new GetCapacityRequest
	in := &pb.GetCapacityRequest{ClientId: proto.String(client.id)}

	// Adds all resources in this client's resource registry to the
	// request.
	for id, resource := range client.resources {
		in.Resource = append(in.Resource, &pb.ResourceRequest{
			Priority:   proto.Int64(resource.priority),
			ResourceId: proto.String(id),
			Wants:      proto.Float64(resource.Wants()),
			Has:        resource.lease,
		})
	}

	if retryNumber > 0 {
		log.Infof("GetCapacity: retry number %v: %v", retryNumber, in)
	}

	out, err := client.getCapacity(in)

	if err != nil {
		log.Errorf("GetCapacityRequest: %v", err)

		// Expired resources only need to be handled if the
		// RPC failed: otherwise the client has gotten a
		// refreshed lease.
		for _, res := range client.resources {
			if res.expires().Before(time.Now()) {
				res.lease = nil
				// FIXME(ryszard): This probably should be the safe
				// capacity instead.
				res.capacity <- 0.0
			}
		}
		return timeutil.Backoff(minBackoff, maxBackoff, retryNumber), retryNumber + 1
	}

	for _, pr := range out.Response {
		res, ok := client.resources[pr.GetResourceId()]

		if !ok {
			log.Errorf("response for non-existing resource: %q", pr.GetResourceId())
			continue
		}

		oldCapacity := float64(-1)

		if res.lease != nil {
			oldCapacity = res.lease.GetCapacity()
		}

		res.lease = pr.GetGets()

		// Only send a message down the channel if the capacity has changed.
		if res.lease.GetCapacity() != oldCapacity {
			// res.capacity is a buffered channel, so if no one is
			// receiving on the other side this will send messages
			// over it until it reaches its size, and then will
			// start dropping them.
			select {
			case res.capacity <- res.lease.GetCapacity():
			default:
			}
		}
	}

	// Finds the minimal refresh interval.
	interval = veryLongTime

	for _, res := range client.resources {
		if refresh := time.Duration(res.lease.GetRefreshInterval()) * time.Second; refresh < interval {
			interval = refresh
		}
	}

	// Applies the --minimum_refresh_interval_secs flag.
	if interval < client.conn.Opts.MinimumRefreshInterval {
		log.Infof("overriding interval %v with %v", interval, client.conn.Opts.MinimumRefreshInterval)
		interval = client.conn.Opts.MinimumRefreshInterval
	}

	return interval, 0
}
Пример #3
0
// runMasterAware is a wrapper for RPCs that may receive a response informing
// of a changed mastership, in which case it will reconnect and retry.
func (connection *Connection) runMasterAware(callback func() (HasMastership, error)) (interface{}, error) {
	var (
		err     error
		out     HasMastership
		retries int
	)

	for {
		// Does the exponential backoff sleep.
		if retries > 0 {
			t := timeutil.Backoff(minBackoff, maxBackoff, retries)
			log.Infof("retry sleep number %d: %v", retries, t)
			time.Sleep(t)
		}

		retries++

		// We goto here when we want to retry the loop without sleeping.
	RetryNoSleep:

		// If there is no current client connection, connect to the original target.
		// If that fails, retry.
		if connection.conn == nil {
			if err := connection.connect(connection.addr); err != nil {
				// The connection failed. Retry.
				continue
			}
		}

		// Calls the callback function that performs an RPC on the master.
		out, err = callback()

		// If an error happened we are going to close the connection to the
		// server. The next iteration will open it again.
		if err != nil {
			connection.Close()
			continue
		}

		// There was no RPC error. Now there can be two cases. Either the server
		// we talked to was the master, and it processes the request, or it was
		// not the master, in which case it tells us who the master is (if it
		// knows). The indicator for this is the presence of the mastership
		// field in the response.
		mastership := out.GetMastership()

		// If there was no mastership field in the response the server we talked
		// to was the master and has processed the request. If that is the case
		// we can return the response.
		if mastership == nil {
			return out, nil
		}

		// If there was a mastership message we check it for presence of the
		// master_bns field. If there is none then the server does not know
		// who the master is. In that case we need to retry.
		if mastership.MasterAddress == nil {
			log.Warningf("%v is not the master, and does not know who the master is", connection.currentMaster)
			continue
		}

		newMaster := mastership.GetMasterAddress()

		// This should not happen, because if the server does not know who the master is
		// it should signify that through the absence of the master_bns field, but why
		// not check it.
		if newMaster == "" {
			log.Errorf("Unexpected error: %v", connection.currentMaster)
			continue
		}

		// The server we talked to told us who the master is. Connect to it.
		connection.connect(newMaster)

		goto RetryNoSleep
	}

	log.Error("runMasterAware failed to complete")

	return nil, err
}
Пример #4
0
// performRequests does a request and returns the duration of the
// shortest refresh interval from all handled resources.
//
// If there's an error, it will be logged, and the returned interval
// will be increasing exponentially (basing on the passed retry
// number). The returned nextRetryNumber should be used in the next
// call to performRequests.
func (server *Server) performRequests(ctx context.Context, retryNumber int) (time.Duration, int) {
	// Creates new GetServerCapacityRequest.
	in := &pb.GetServerCapacityRequest{ServerId: proto.String(server.ID)}

	server.mu.RLock()

	// Adds all resources in this client's resource registry to the request.
	for id, resource := range server.resources {
		status := resource.Status()

		// For now we do not take into account clients with different
		// priorities. That is why we form only one PriorityBandAggregate proto.
		// Also, compose request only for the resource whose wants capacity > 0,
		// because it makes no sense to ask for zero capacity.
		if status.SumWants > 0 {
			in.Resource = append(in.Resource, &pb.ServerCapacityResourceRequest{
				ResourceId: proto.String(id),
				// TODO(rushanny): fill optional Has field which is of type Lease.
				Wants: []*pb.PriorityBandAggregate{
					{
						// TODO(rushanny): replace defaultPriority with some client's priority.
						Priority:   proto.Int64(int64(defaultPriority)),
						NumClients: proto.Int64(status.Count),
						Wants:      proto.Float64(status.SumWants),
					},
				},
			})
		}
	}

	// If there is no actual resources that we could ask for, just send a default request
	// just to check a lower-level server's availability.
	if len(server.resources) == 0 {
		in.Resource = append(in.Resource, defaultServerCapacityResourceRequest)
	}
	server.mu.RUnlock()

	if retryNumber > 0 {
		log.Infof("GetServerCapacity: retry number %v: %v\n", retryNumber, in)
	}

	out, err := server.getCapacityRPC(ctx, in)
	if err != nil {
		log.Errorf("GetServerCapacityRequest: %v", err)
		return timeutil.Backoff(minBackoff, maxBackoff, retryNumber), retryNumber + 1
	}

	// Find the minimal refresh interval.
	interval := veryLongTime
	var templates []*pb.ResourceTemplate
	expiryTimes := make(map[string]*time.Time, 0)

	for _, pr := range out.Response {
		_, ok := server.resources[pr.GetResourceId()]
		if !ok {
			log.Errorf("response for non-existing resource: %q", pr.GetResourceId())
			continue
		}

		// Refresh an expiry time for the resource.
		expiryTime := time.Unix(pr.GetGets().GetExpiryTime(), 0)
		expiryTimes[pr.GetResourceId()] = &expiryTime

		// Add a new resource configuration.
		templates = append(templates, &pb.ResourceTemplate{
			IdentifierGlob: proto.String(pr.GetResourceId()),
			Capacity:       proto.Float64(pr.GetGets().GetCapacity()),
			SafeCapacity:   proto.Float64(pr.GetSafeCapacity()),
			Algorithm:      pr.GetAlgorithm(),
		})

		// Find the minimum refresh interval.
		if refresh := time.Duration(pr.GetGets().GetRefreshInterval()) * time.Second; refresh < interval {
			interval = refresh
		}
	}

	// Append the default template for * resource. It should be the last one in templates.
	templates = append(templates, proto.Clone(defaultResourceTemplate).(*pb.ResourceTemplate))

	// Load a new configuration for the resources.
	if err := server.LoadConfig(ctx, &pb.ResourceRepository{
		Resources: templates,
	}, expiryTimes); err != nil {
		log.Errorf("server.LoadConfig: %v", err)
		return timeutil.Backoff(minBackoff, maxBackoff, retryNumber), retryNumber + 1
	}

	// Applies the --minimum_refresh_interval_secs flag.
	// Or if interval was set to veryLongTime and not updated, set it to minimum refresh interval.
	if interval < server.conn.Opts.MinimumRefreshInterval || interval == veryLongTime {
		log.Infof("overriding interval %v with %v", interval, server.conn.Opts.MinimumRefreshInterval)
		interval = server.conn.Opts.MinimumRefreshInterval
	}

	return interval, 0
}