Beispiel #1
1
// compact compacts etcd store and returns current rev.
// It will return the current compact time and global revision if no error occurred.
// Note that CAS fail will not incur any error.
func compact(ctx context.Context, client *clientv3.Client, t, rev int64) (int64, int64, error) {
	resp, err := client.KV.Txn(ctx).If(
		clientv3.Compare(clientv3.Version(compactRevKey), "=", t),
	).Then(
		clientv3.OpPut(compactRevKey, strconv.FormatInt(rev, 10)), // Expect side effect: increment Version
	).Else(
		clientv3.OpGet(compactRevKey),
	).Commit()
	if err != nil {
		return t, rev, err
	}

	curRev := resp.Header.Revision

	if !resp.Succeeded {
		curTime := resp.Responses[0].GetResponseRange().Kvs[0].Version
		return curTime, curRev, nil
	}
	curTime := t + 1

	if rev == 0 {
		// We don't compact on bootstrap.
		return curTime, curRev, nil
	}
	if _, err = client.Compact(ctx, rev); err != nil {
		return curTime, curRev, err
	}
	glog.Infof("etcd: compacted rev (%d), endpoints (%v)", rev, client.Endpoints())
	return curTime, curRev, nil
}
Beispiel #2
0
// compactor periodically compacts historical versions of keys in etcd.
// It will compact keys with versions older than given interval.
// In other words, after compaction, it will only contain keys set during last interval.
// Any API call for the older versions of keys will return error.
// Interval is the time interval between each compaction. The first compaction happens after "interval".
func compactor(ctx context.Context, client *clientv3.Client, interval time.Duration) {
	// Technical definitions:
	// We have a special key in etcd defined as *compactRevKey*.
	// compactRevKey's value will be set to the string of last compacted revision.
	// compactRevKey's version will be used as logical time for comparison. THe version is referred as compact time.
	// Initially, because the key doesn't exist, the compact time (version) is 0.
	//
	// Algorithm:
	// - Compare to see if (local compact_time) = (remote compact_time).
	// - If yes, increment both local and remote compact_time, and do a compaction.
	// - If not, set local to remote compact_time.
	//
	// Technical details/insights:
	//
	// The protocol here is lease based. If one compactor CAS successfully, the others would know it when they fail in
	// CAS later and would try again in 10 minutes. If an APIServer crashed, another one would "take over" the lease.
	//
	// For example, in the following diagram, we have a compactor C1 doing compaction in t1, t2. Another compactor C2
	// at t1' (t1 < t1' < t2) would CAS fail, set its known oldRev to rev at t1, and try again in t2' (t2' > t2).
	// If C1 crashed and wouldn't compact at t2, C2 would CAS successfully at t2'.
	//
	//                   oldRev(t2)   curRev(t2)
	//                                  +
	//   oldRev        curRev           |
	//     +             +              |
	//     |             |              |
	//     |             |    t1'       |     t2'
	// +---v-------------v----^---------v------^---->
	//     t0           t1             t2
	//
	// We have the guarantees:
	// - in normal cases, the interval is 10 minutes.
	// - in failover, the interval is >10m and <20m
	//
	// FAQ:
	// - What if time is not accurate? We don't care as long as someone did the compaction. Atomicity is ensured using
	//   etcd API.
	// - What happened under heavy load scenarios? Initially, each apiserver will do only one compaction
	//   every 10 minutes. This is very unlikely affecting or affected w.r.t. server load.

	var compactTime int64
	var rev int64
	var err error
	for {
		select {
		case <-time.After(interval):
		case <-ctx.Done():
			return
		}

		compactTime, rev, err = compact(ctx, client, compactTime, rev)
		if err != nil {
			glog.Errorf("etcd: endpoint (%v) compact failed: %v", client.Endpoints(), err)
			continue
		}
	}
}
Beispiel #3
0
// compact compacts etcd store and returns current rev.
// If it couldn't get current revision, the old rev will be returned.
func compact(ctx context.Context, client *clientv3.Client, oldRev int64) (int64, error) {
	resp, err := client.Get(ctx, "/")
	if err != nil {
		return oldRev, err
	}
	curRev := resp.Header.Revision
	if oldRev == 0 {
		return curRev, nil
	}
	err = client.Compact(ctx, oldRev)
	if err != nil {
		return curRev, err
	}
	glog.Infof("etcd: Compacted rev %d, endpoints %v", oldRev, client.Endpoints())
	return curRev, nil
}
Beispiel #4
0
// StartCompactor starts a compactor in the background in order to compact keys
// older than fixed time.
// We need to compact keys because we can't let on disk data grow forever.
// We save the most recent 10 minutes data. It should be enough for slow watchers and to tolerate burst.
// TODO: We might keep a longer history (12h) in the future once storage API can take
//       advantage of multi-version key.
func StartCompactor(ctx context.Context, client *clientv3.Client) {
	endpointsMapMu.Lock()
	defer endpointsMapMu.Unlock()

	// We can't have multiple compaction jobs for the same cluster.
	// Currently we rely on endpoints to differentiate clusters.
	var emptyStruct struct{}
	for _, ep := range client.Endpoints() {
		if _, ok := endpointsMap[ep]; ok {
			glog.V(4).Infof("compactor already exists for endpoints %v")
			return
		}
	}
	for _, ep := range client.Endpoints() {
		endpointsMap[ep] = emptyStruct
	}

	go compactor(ctx, client, compactInterval)
}
Beispiel #5
0
// StartCompactor starts a compactor in the background to compact old version of keys that's not needed.
// By default, we save the most recent 10 minutes data and compact versions > 10minutes ago.
// It should be enough for slow watchers and to tolerate burst.
// TODO: We might keep a longer history (12h) in the future once storage API can take advantage of past version of keys.
func StartCompactor(ctx context.Context, client *clientv3.Client) {
	endpointsMapMu.Lock()
	defer endpointsMapMu.Unlock()

	// In one process, we can have only one compactor for one cluster.
	// Currently we rely on endpoints to differentiate clusters.
	for _, ep := range client.Endpoints() {
		if _, ok := endpointsMap[ep]; ok {
			glog.V(4).Infof("compactor already exists for endpoints %v", client.Endpoints())
			return
		}
	}
	for _, ep := range client.Endpoints() {
		endpointsMap[ep] = struct{}{}
	}

	go compactor(ctx, client, compactInterval)
}