Пример #1
1
// compact compacts etcd store and returns current rev.
// It will return the current compact time and global revision if no error occurred.
// Note that CAS fail will not incur any error.
func compact(ctx context.Context, client *clientv3.Client, t, rev int64) (int64, int64, error) {
	resp, err := client.KV.Txn(ctx).If(
		clientv3.Compare(clientv3.Version(compactRevKey), "=", t),
	).Then(
		clientv3.OpPut(compactRevKey, strconv.FormatInt(rev, 10)), // Expect side effect: increment Version
	).Else(
		clientv3.OpGet(compactRevKey),
	).Commit()
	if err != nil {
		return t, rev, err
	}

	curRev := resp.Header.Revision

	if !resp.Succeeded {
		curTime := resp.Responses[0].GetResponseRange().Kvs[0].Version
		return curTime, curRev, nil
	}
	curTime := t + 1

	if rev == 0 {
		// We don't compact on bootstrap.
		return curTime, curRev, nil
	}
	if _, err = client.Compact(ctx, rev); err != nil {
		return curTime, curRev, err
	}
	glog.Infof("etcd: compacted rev (%d), endpoints (%v)", rev, client.Endpoints())
	return curTime, curRev, nil
}
Пример #2
0
// WaitEvents waits on a key until it observes the given events and returns the final one.
func WaitEvents(c *clientv3.Client, key string, rev int64, evs []storagepb.Event_EventType) (*storagepb.Event, error) {
	wc := c.Watch(context.Background(), key, clientv3.WithRev(rev))
	if wc == nil {
		return nil, ErrNoWatcher
	}
	return waitEvents(wc, evs), nil
}
Пример #3
0
/*
Sync localdir to etcd server state.
WARNING: ALL CONTENT OF localdir WILL BE LOST

Return revision of synced state
*/
func firstSyncEtcDir_v3(prefix string, c *clientv3.Client, localdir string) int64 {
	cleanDir(localdir)

	key, option := prefixToKeyOption(prefix)

	// Get all values
	resp, err := c.Get(context.Background(), key, option, clientv3.WithSort(clientv3.SortByKey, clientv3.SortDescend))
	if err != nil {
		panic(err)
	}

	for _, kv := range resp.Kvs {
		targetPath := keyToLocalPath(strings.TrimPrefix(string(kv.Key), prefix), localdir)
		if targetPath == "" {
			continue
		}
		targetDir := filepath.Dir(targetPath)
		os.MkdirAll(targetDir, DEFAULT_DIRMODE)
		err = ioutil.WriteFile(targetPath, kv.Value, DEFAULT_FILEMODE)
		if err != nil {
			log.Printf("firstSyncEtcDir_v3 error write file '%v': %v\n", targetPath, err)
		}
	}
	return resp.Header.Revision
}
Пример #4
0
func WaitPrefixEvents(c *clientv3.Client, prefix string, rev int64, evs []mvccpb.Event_EventType) (*clientv3.Event, error) {
	wc := c.Watch(context.Background(), prefix, clientv3.WithPrefix(), clientv3.WithRev(rev))
	if wc == nil {
		return nil, ErrNoWatcher
	}
	return waitEvents(wc, evs), nil
}
Пример #5
0
// waitDeletes efficiently waits until all keys matched by Get(key, opts...) are deleted
func waitDeletes(ctx context.Context, client *v3.Client, key string, opts ...v3.OpOption) error {
	getOpts := []v3.OpOption{v3.WithSort(v3.SortByCreatedRev, v3.SortAscend)}
	getOpts = append(getOpts, opts...)
	resp, err := client.Get(ctx, key, getOpts...)
	maxRev := int64(math.MaxInt64)
	getOpts = append(getOpts, v3.WithRev(0))
	for err == nil {
		for len(resp.Kvs) > 0 {
			i := len(resp.Kvs) - 1
			if resp.Kvs[i].CreateRevision <= maxRev {
				break
			}
			resp.Kvs = resp.Kvs[:i]
		}
		if len(resp.Kvs) == 0 {
			break
		}
		lastKV := resp.Kvs[len(resp.Kvs)-1]
		maxRev = lastKV.CreateRevision
		err = waitDelete(ctx, client, string(lastKV.Key), maxRev)
		if err != nil || len(resp.Kvs) == 1 {
			break
		}
		getOpts = append(getOpts, v3.WithLimit(int64(len(resp.Kvs)-1)))
		resp, err = client.Get(ctx, key, getOpts...)
	}
	return err
}
Пример #6
0
func NewWatchProxy(c *clientv3.Client) pb.WatchServer {
	wp := &watchProxy{
		cw:           c.Watcher,
		ctx:          clientv3.WithRequireLeader(c.Ctx()),
		retryLimiter: rate.NewLimiter(rate.Limit(retryPerSecond), retryPerSecond),
		leaderc:      make(chan struct{}),
	}
	wp.ranges = newWatchRanges(wp)
	go func() {
		// a new streams without opening any watchers won't catch
		// a lost leader event, so have a special watch to monitor it
		rev := int64((uint64(1) << 63) - 2)
		for wp.ctx.Err() == nil {
			wch := wp.cw.Watch(wp.ctx, lostLeaderKey, clientv3.WithRev(rev))
			for range wch {
			}
			wp.mu.Lock()
			close(wp.leaderc)
			wp.leaderc = make(chan struct{})
			wp.mu.Unlock()
			wp.retryLimiter.Wait(wp.ctx)
		}
		wp.mu.Lock()
		<-wp.ctx.Done()
		wp.mu.Unlock()
		wp.wg.Wait()
		wp.ranges.stop()
	}()
	return wp
}
Пример #7
0
func getKey(ctx context.Context, client *clientv3.Client, key string) (*clientv3.GetResponse, error) {
	for ctx.Err() == nil {
		if gr, err := client.Get(ctx, key); err == nil {
			return gr, nil
		}
	}
	return nil, ctx.Err()
}
Пример #8
0
func deletePrefix(ctx context.Context, client *clientv3.Client, key string) error {
	for ctx.Err() == nil {
		if _, err := client.Delete(ctx, key, clientv3.WithPrefix()); err == nil {
			return nil
		}
	}
	return ctx.Err()
}
Пример #9
0
// compactor periodically compacts historical versions of keys in etcd.
// It will compact keys with versions older than given interval.
// In other words, after compaction, it will only contain keys set during last interval.
// Any API call for the older versions of keys will return error.
// Interval is the time interval between each compaction. The first compaction happens after "interval".
func compactor(ctx context.Context, client *clientv3.Client, interval time.Duration) {
	// Technical definitions:
	// We have a special key in etcd defined as *compactRevKey*.
	// compactRevKey's value will be set to the string of last compacted revision.
	// compactRevKey's version will be used as logical time for comparison. THe version is referred as compact time.
	// Initially, because the key doesn't exist, the compact time (version) is 0.
	//
	// Algorithm:
	// - Compare to see if (local compact_time) = (remote compact_time).
	// - If yes, increment both local and remote compact_time, and do a compaction.
	// - If not, set local to remote compact_time.
	//
	// Technical details/insights:
	//
	// The protocol here is lease based. If one compactor CAS successfully, the others would know it when they fail in
	// CAS later and would try again in 10 minutes. If an APIServer crashed, another one would "take over" the lease.
	//
	// For example, in the following diagram, we have a compactor C1 doing compaction in t1, t2. Another compactor C2
	// at t1' (t1 < t1' < t2) would CAS fail, set its known oldRev to rev at t1, and try again in t2' (t2' > t2).
	// If C1 crashed and wouldn't compact at t2, C2 would CAS successfully at t2'.
	//
	//                   oldRev(t2)   curRev(t2)
	//                                  +
	//   oldRev        curRev           |
	//     +             +              |
	//     |             |              |
	//     |             |    t1'       |     t2'
	// +---v-------------v----^---------v------^---->
	//     t0           t1             t2
	//
	// We have the guarantees:
	// - in normal cases, the interval is 10 minutes.
	// - in failover, the interval is >10m and <20m
	//
	// FAQ:
	// - What if time is not accurate? We don't care as long as someone did the compaction. Atomicity is ensured using
	//   etcd API.
	// - What happened under heavy load scenarios? Initially, each apiserver will do only one compaction
	//   every 10 minutes. This is very unlikely affecting or affected w.r.t. server load.

	var compactTime int64
	var rev int64
	var err error
	for {
		select {
		case <-time.After(interval):
		case <-ctx.Done():
			return
		}

		compactTime, rev, err = compact(ctx, client, compactTime, rev)
		if err != nil {
			glog.Errorf("etcd: endpoint (%v) compact failed: %v", client.Endpoints(), err)
			continue
		}
	}
}
Пример #10
0
func waitUpdate(ctx context.Context, client *v3.Client, key string, opts ...v3.OpOption) error {
	cctx, cancel := context.WithCancel(ctx)
	defer cancel()
	wresp, ok := <-client.Watch(cctx, key, opts...)
	if !ok {
		return ctx.Err()
	}
	return wresp.Err()
}
Пример #11
0
// snapshotToStdout streams a snapshot over stdout
func snapshotToStdout(c *clientv3.Client) {
	// must explicitly fetch first revision since no retry on stdout
	wr := <-c.Watch(context.TODO(), "", clientv3.WithPrefix(), clientv3.WithRev(1))
	if wr.Err() == nil {
		wr.CompactRevision = 1
	}
	if rev := snapshot(os.Stdout, c, wr.CompactRevision+1); rev != 0 {
		err := fmt.Errorf("snapshot interrupted by compaction %v", rev)
		ExitWithError(ExitInterrupted, err)
	}
	os.Stdout.Sync()
}
Пример #12
0
func loadEtcdV3Config(client etcdv3.Client, config *server.Config) error {
	configPath := "/" + msg.PathPrefix + "/config"
	resp, err := client.Get(ctx, configPath)
	if err != nil {
		log.Printf("skydns: falling back to default configuration, could not read from etcd: %s", err)
		return nil
	}
	for _, ev := range resp.Kvs {
		if err := json.Unmarshal([]byte(ev.Value), config); err != nil {
			return fmt.Errorf("failed to unmarshal config: %s", err.Error())
		}
	}
	return nil
}
Пример #13
0
// NewSession gets the leased session for a client.
func NewSession(client *v3.Client, opts ...SessionOption) (*Session, error) {
	ops := &sessionOptions{ttl: defaultSessionTTL}
	for _, opt := range opts {
		opt(ops)
	}

	resp, err := client.Grant(client.Ctx(), int64(ops.ttl))
	if err != nil {
		return nil, err
	}
	id := v3.LeaseID(resp.ID)

	ctx, cancel := context.WithCancel(client.Ctx())
	keepAlive, err := client.KeepAlive(ctx, id)
	if err != nil || keepAlive == nil {
		return nil, err
	}

	donec := make(chan struct{})
	s := &Session{client: client, id: id, cancel: cancel, donec: donec}

	// keep the lease alive until client error or cancelled context
	go func() {
		defer close(donec)
		for range keepAlive {
			// eat messages until keep alive channel closes
		}
	}()

	return s, nil
}
Пример #14
0
// compact compacts etcd store and returns current rev.
// If it couldn't get current revision, the old rev will be returned.
func compact(ctx context.Context, client *clientv3.Client, oldRev int64) (int64, error) {
	resp, err := client.Get(ctx, "/")
	if err != nil {
		return oldRev, err
	}
	curRev := resp.Header.Revision
	if oldRev == 0 {
		return curRev, nil
	}
	err = client.Compact(ctx, oldRev)
	if err != nil {
		return curRev, err
	}
	return curRev, nil
}
Пример #15
0
func waitDelete(ctx context.Context, client *v3.Client, key string, rev int64) error {
	cctx, cancel := context.WithCancel(ctx)
	defer cancel()
	wch := client.Watch(cctx, key, v3.WithRev(rev))
	for wr := range wch {
		for _, ev := range wr.Events {
			if ev.Type == storagepb.DELETE {
				return nil
			}
		}
	}
	if err := ctx.Err(); err != nil {
		return err
	}
	return fmt.Errorf("lost watcher waiting for delete")
}
Пример #16
0
func doSerializedGet(ctx context.Context, client *v3.Client, results chan result) {
	for {
		st := time.Now()
		_, err := client.Get(ctx, "abc", v3.WithSerializable())
		if ctx.Err() != nil {
			break
		}
		var errStr string
		if err != nil {
			errStr = err.Error()
		}
		res := result{errStr: errStr, duration: time.Since(st), happened: time.Now()}
		results <- res
	}
	close(results)
}
Пример #17
0
func toGRPC(c *clientv3.Client) grpcAPI {
	return grpcAPI{
		pb.NewClusterClient(c.ActiveConnection()),
		pb.NewKVClient(c.ActiveConnection()),
		pb.NewLeaseClient(c.ActiveConnection()),
		pb.NewWatchClient(c.ActiveConnection()),
		pb.NewMaintenanceClient(c.ActiveConnection()),
	}
}
Пример #18
0
func NewWatchProxy(c *clientv3.Client) pb.WatchServer {
	wp := &watchProxy{
		cw: c.Watcher,
		wgs: watchergroups{
			cw:        c.Watcher,
			groups:    make(map[watchRange]*watcherGroup),
			idToGroup: make(map[receiverID]*watcherGroup),
			proxyCtx:  c.Ctx(),
		},
		ctx: c.Ctx(),
	}
	go func() {
		<-wp.ctx.Done()
		wp.wgs.stop()
	}()
	return wp
}
Пример #19
0
func lockUntilSignal(c *clientv3.Client, lockname string) error {
	s, err := concurrency.NewSession(c)
	if err != nil {
		return err
	}

	m := concurrency.NewMutex(s, lockname)
	ctx, cancel := context.WithCancel(context.TODO())

	// unlock in case of ordinary shutdown
	donec := make(chan struct{})
	sigc := make(chan os.Signal, 1)
	signal.Notify(sigc, os.Interrupt, os.Kill)
	go func() {
		<-sigc
		cancel()
		close(donec)
	}()

	s, serr := concurrency.NewSession(c)
	if serr != nil {
		return serr
	}

	if err := m.Lock(ctx); err != nil {
		return err
	}

	k, kerr := c.Get(ctx, m.Key())
	if kerr != nil {
		return kerr
	}
	if len(k.Kvs) == 0 {
		return errors.New("lock lost on init")
	}

	display.Get(*k)

	select {
	case <-donec:
		return m.Unlock(context.TODO())
	case <-s.Done():
	}

	return errors.New("session expired")
}
Пример #20
0
func getLeader(etcdClient *clientv3.Client, path string) (string, int64, error) {
	kv := clientv3.NewKV(etcdClient)
	ctx, cancel := context.WithTimeout(etcdClient.Ctx(), requestTimeout)
	resp, err := kv.Get(ctx, path)
	cancel()
	if err != nil {
		return "", 0, errors.Trace(err)
	}
	if len(resp.Kvs) != 1 {
		return "", 0, errors.Errorf("invalid getLeader resp: %v", resp)
	}

	var leader pdpb.Leader
	if err = leader.Unmarshal(resp.Kvs[0].Value); err != nil {
		return "", 0, errors.Trace(err)
	}
	return leader.GetAddr(), resp.Header.Revision, nil
}
Пример #21
0
// StartCompactor starts a compactor in the background in order to compact keys
// older than fixed time.
// We need to compact keys because we can't let on disk data grow forever.
// We save the most recent 10 minutes data. It should be enough for slow watchers and to tolerate burst.
// TODO: We might keep a longer history (12h) in the future once storage API can take
//       advantage of multi-version key.
func StartCompactor(ctx context.Context, client *clientv3.Client) {
	endpointsMapMu.Lock()
	defer endpointsMapMu.Unlock()

	// We can't have multiple compaction jobs for the same cluster.
	// Currently we rely on endpoints to differentiate clusters.
	var emptyStruct struct{}
	for _, ep := range client.Endpoints() {
		if _, ok := endpointsMap[ep]; ok {
			glog.V(4).Infof("compactor already exists for endpoints %v")
			return
		}
	}
	for _, ep := range client.Endpoints() {
		endpointsMap[ep] = emptyStruct
	}

	go compactor(ctx, client, compactInterval)
}
Пример #22
0
func prepareObjs(ctx context.Context, e *event, client *clientv3.Client, codec runtime.Codec, versioner storage.Versioner) (curObj runtime.Object, oldObj runtime.Object, err error) {
	if !e.isDeleted {
		curObj, err = decodeObj(codec, versioner, e.value, e.rev)
		if err != nil {
			return nil, nil, err
		}
	}
	if e.isDeleted || !e.isCreated {
		getResp, err := client.Get(ctx, e.key, clientv3.WithRev(e.rev-1))
		if err != nil {
			return nil, nil, err
		}
		oldObj, err = decodeObj(codec, versioner, getResp.Kvs[0].Value, getResp.Kvs[0].ModRevision)
		if err != nil {
			return nil, nil, err
		}
	}
	return curObj, oldObj, nil
}
Пример #23
0
func getWatchChan(c *clientv3.Client, args []string) (clientv3.WatchChan, error) {
	if len(args) < 1 || len(args) > 2 {
		return nil, fmt.Errorf("bad number of arguments")
	}
	key := args[0]
	opts := []clientv3.OpOption{clientv3.WithRev(watchRev)}
	if len(args) == 2 {
		if watchPrefix {
			return nil, fmt.Errorf("`range_end` and `--prefix` are mutually exclusive")
		}
		opts = append(opts, clientv3.WithRange(args[1]))
	}
	if watchPrefix {
		opts = append(opts, clientv3.WithPrefix())
	}
	if watchPrevKey {
		opts = append(opts, clientv3.WithPrevKV())
	}
	return c.Watch(context.TODO(), key, opts...), nil
}
Пример #24
0
func campaign(c *clientv3.Client, election string, prop string) error {
	s, err := concurrency.NewSession(c)
	if err != nil {
		return err
	}
	e := concurrency.NewElection(s, election)
	ctx, cancel := context.WithCancel(context.TODO())

	donec := make(chan struct{})
	sigc := make(chan os.Signal, 1)
	signal.Notify(sigc, os.Interrupt, os.Kill)
	go func() {
		<-sigc
		cancel()
		close(donec)
	}()

	s, serr := concurrency.NewSession(c)
	if serr != nil {
		return serr
	}

	if err = e.Campaign(ctx, prop); err != nil {
		return err
	}

	// print key since elected
	resp, err := c.Get(ctx, e.Key())
	if err != nil {
		return err
	}
	display.Get(*resp)

	select {
	case <-donec:
	case <-s.Done():
		return errors.New("elect: session expired")
	}

	return e.Resign(context.TODO())
}
Пример #25
0
func putKeyAtMostOnce(ctx context.Context, client *clientv3.Client, key string) error {
	gr, err := getKey(ctx, client, key)
	if err != nil {
		return err
	}

	var modrev int64
	if len(gr.Kvs) > 0 {
		modrev = gr.Kvs[0].ModRevision
	}

	for ctx.Err() == nil {
		_, err := client.Txn(ctx).If(clientv3.Compare(clientv3.ModRevision(key), "=", modrev)).Then(clientv3.OpPut(key, key)).Commit()

		if err == nil {
			return nil
		}
	}

	return ctx.Err()
}
Пример #26
0
func etcdMon_v3(prefix string, c3 *clientv3.Client, bus chan fileChangeEvent, startRevision int64) {
	key, option := prefixToKeyOption(prefix)
	ch := c3.Watch(context.Background(), key, option, clientv3.WithRev(startRevision))
	for chEvent := range ch {
		for _, event := range chEvent.Events {
			fileEvent := fileChangeEvent{
				Path:    string(event.Kv.Key),
				Content: event.Kv.Value,
			}
			switch event.Type {
			case mvccpb.PUT:
				bus <- fileEvent
			case mvccpb.DELETE:
				fileEvent.IsRemoved = true
				bus <- fileEvent
			default:
				log.Println("etcdMon_v3 undefined event type: ", event.Type)
			}
		}
	}
	close(bus)
}
Пример #27
0
// NewSession gets the leased session for a client.
func NewSession(client *v3.Client) (*Session, error) {
	clientSessions.mu.Lock()
	defer clientSessions.mu.Unlock()
	if s, ok := clientSessions.sessions[client]; ok {
		return s, nil
	}

	resp, err := client.Create(context.TODO(), sessionTTL)
	if err != nil {
		return nil, err
	}
	id := lease.LeaseID(resp.ID)

	ctx, cancel := context.WithCancel(context.Background())
	keepAlive, err := client.KeepAlive(ctx, id)
	if err != nil || keepAlive == nil {
		return nil, err
	}

	donec := make(chan struct{})
	s := &Session{client: client, id: id, cancel: cancel, donec: donec}
	clientSessions.sessions[client] = s

	// keep the lease alive until client error or cancelled context
	go func() {
		defer func() {
			clientSessions.mu.Lock()
			delete(clientSessions.sessions, client)
			clientSessions.mu.Unlock()
			close(donec)
		}()
		for range keepAlive {
			// eat messages until keep alive channel closes
		}
	}()

	return s, nil
}
Пример #28
0
func prepareObjs(ctx context.Context, e *event, client *clientv3.Client, codec runtime.Codec, versioner storage.Versioner) (curObj runtime.Object, oldObj runtime.Object, err error) {
	if !e.isDeleted {
		curObj, err = decodeObj(codec, versioner, e.value, e.rev)
		if err != nil {
			return nil, nil, err
		}
	}
	if e.isDeleted || !e.isCreated {
		getResp, err := client.Get(ctx, e.key, clientv3.WithRev(e.rev-1))
		if err != nil {
			return nil, nil, err
		}
		// Note that this sends the *old* object with the etcd revision for the time at
		// which it gets deleted.
		// We assume old object is returned only in Deleted event. Users (e.g. cacher) need
		// to have larger than previous rev to tell the ordering.
		oldObj, err = decodeObj(codec, versioner, getResp.Kvs[0].Value, e.rev)
		if err != nil {
			return nil, nil, err
		}
	}
	return curObj, oldObj, nil
}
Пример #29
0
func toGRPC(c *clientv3.Client) grpcAPI {
	if v, ok := proxies[c]; ok {
		return v
	}
	return grpcAPI{
		pb.NewClusterClient(c.ActiveConnection()),
		grpcproxy.KvServerToKvClient(grpcproxy.NewKvProxy(c)),
		pb.NewLeaseClient(c.ActiveConnection()),
		grpcproxy.WatchServerToWatchClient(grpcproxy.NewWatchProxy(c)),
		pb.NewMaintenanceClient(c.ActiveConnection()),
	}
}
Пример #30
0
func syncProcess_v3FSEvent(localDir string, serverPrefix string, c3 *clientv3.Client, event fileChangeEvent) {
	etcdPath, err := filepath.Rel(localDir, event.Path)
	if err != nil {
		log.Printf("syncProcess_v3 error get relpath '%v': %v\n", event.Path, err)
		return
	}
	etcdPath = serverPrefix + etcdPath
	etcdPath = strings.Replace(etcdPath, string(os.PathSeparator), "/", -1)

	switch {
	case event.IsRemoved:
		_, err := c3.Delete(context.Background(), etcdPath)
		if err != nil {
			log.Printf("syncProcess_v3 error while delete etcdkey '%v': %v\n", etcdPath, err)
		}
	case event.IsDir:
		files, _ := ioutil.ReadDir(event.Path)
		for _, file := range files {
			path := filepath.Join(event.Path, file.Name())
			content := []byte(nil)
			if !file.IsDir() {
				content, err = ioutil.ReadFile(path)
				if err != nil {
					log.Println(err)
				}
			}
			syncProcess_v3FSEvent(localDir, serverPrefix, c3, fileChangeEvent{
				Path:      path,
				IsDir:     file.IsDir(),
				IsRemoved: false,
				Content:   content,
			})
		}
	case !event.IsDir:
		resp, err := c3.Get(context.Background(), etcdPath)
		if err != nil {
			log.Printf("syncProcess_v3 Can't read key '%v': %v\n", etcdPath, err)
		}
		if len(resp.Kvs) > 0 {
			if bytes.Equal(resp.Kvs[0].Value, event.Content) {
				return
			}
		}
		_, err = c3.Put(context.Background(), etcdPath, string(event.Content))
		if err != nil {
			log.Printf("syncProcess_v3 error while put etcdkey '%v': %v\n", etcdPath, err)
		}
	}
}