// compact compacts etcd store and returns current rev. // It will return the current compact time and global revision if no error occurred. // Note that CAS fail will not incur any error. func compact(ctx context.Context, client *clientv3.Client, t, rev int64) (int64, int64, error) { resp, err := client.KV.Txn(ctx).If( clientv3.Compare(clientv3.Version(compactRevKey), "=", t), ).Then( clientv3.OpPut(compactRevKey, strconv.FormatInt(rev, 10)), // Expect side effect: increment Version ).Else( clientv3.OpGet(compactRevKey), ).Commit() if err != nil { return t, rev, err } curRev := resp.Header.Revision if !resp.Succeeded { curTime := resp.Responses[0].GetResponseRange().Kvs[0].Version return curTime, curRev, nil } curTime := t + 1 if rev == 0 { // We don't compact on bootstrap. return curTime, curRev, nil } if _, err = client.Compact(ctx, rev); err != nil { return curTime, curRev, err } glog.Infof("etcd: compacted rev (%d), endpoints (%v)", rev, client.Endpoints()) return curTime, curRev, nil }
func parseCompare(line string) (*clientv3.Cmp, error) { var ( key string op string val string ) lparenSplit := strings.SplitN(line, "(", 2) if len(lparenSplit) != 2 { return nil, fmt.Errorf("malformed comparison: %s", line) } target := lparenSplit[0] n, serr := fmt.Sscanf(lparenSplit[1], "%q) %s %q", &key, &op, &val) if n != 3 { return nil, fmt.Errorf("malformed comparison: %s; got %s(%q) %s %q", line, target, key, op, val) } if serr != nil { return nil, fmt.Errorf("malformed comparison: %s (%v)", line, serr) } var ( v int64 err error cmp clientv3.Cmp ) switch target { case "ver", "version": if v, err = strconv.ParseInt(val, 10, 64); err == nil { cmp = clientv3.Compare(clientv3.Version(key), op, v) } case "c", "create": if v, err = strconv.ParseInt(val, 10, 64); err == nil { cmp = clientv3.Compare(clientv3.CreateRevision(key), op, v) } case "m", "mod": if v, err = strconv.ParseInt(val, 10, 64); err == nil { cmp = clientv3.Compare(clientv3.ModRevision(key), op, v) } case "val", "value": cmp = clientv3.Compare(clientv3.Value(key), op, val) default: return nil, fmt.Errorf("malformed comparison: %s (unknown target %s)", line, target) } if err != nil { return nil, fmt.Errorf("invalid txn compare request: %s", line) } return &cmp, nil }
func isKeyCurrent(k string, r *v3.GetResponse) v3.Cmp { rev := r.Header.Revision + 1 if len(r.Kvs) != 0 { rev = r.Kvs[0].ModRevision + 1 } return v3.Compare(v3.ModifiedRevision(k), "<", rev) }
func (c *etcdCtx) SetRing(ring torus.Ring) error { oldr, etcdver, err := c.getRing() if err != nil { return err } if oldr.Version() != ring.Version()-1 { return torus.ErrNonSequentialRing } b, err := ring.Marshal() if err != nil { return err } key := MkKey("meta", "the-one-ring") txn := c.etcd.Client.Txn(c.getContext()).If( etcdv3.Compare(etcdv3.Version(key), "=", etcdver), ).Then( etcdv3.OpPut(key, string(b)), ) resp, err := txn.Commit() if err != nil { return err } if resp.Succeeded { return nil } return torus.ErrAgain }
// Lock locks the mutex with a cancellable context. If the context is cancelled // while trying to acquire the lock, the mutex tries to clean its stale lock entry. func (m *Mutex) Lock(ctx context.Context) error { s, serr := NewSession(m.client) if serr != nil { return serr } m.myKey = fmt.Sprintf("%s/%x", m.pfx, s.Lease()) cmp := v3.Compare(v3.CreateRevision(m.myKey), "=", 0) // put self in lock waiters via myKey; oldest waiter holds lock put := v3.OpPut(m.myKey, "", v3.WithLease(s.Lease())) // reuse key in case this session already holds the lock get := v3.OpGet(m.myKey) resp, err := m.client.Txn(ctx).If(cmp).Then(put).Else(get).Commit() if err != nil { return err } m.myRev = resp.Header.Revision if !resp.Succeeded { m.myRev = resp.Responses[0].GetResponseRange().Kvs[0].CreateRevision } // wait for deletion revisions prior to myKey err = waitDeletes(ctx, m.client, m.pfx, v3.WithPrefix(), v3.WithRev(m.myRev-1)) // release lock key if cancelled select { case <-ctx.Done(): m.Unlock() default: } return err }
func (b *blockEtcd) CreateBlockVolume(volume *models.Volume) error { new, err := b.AtomicModifyKey([]byte(etcd.MkKey("meta", "volumeminter")), etcd.BytesAddOne) volume.Id = new.(uint64) if err != nil { return err } vbytes, err := volume.Marshal() if err != nil { return err } inodeBytes := torus.NewINodeRef(torus.VolumeID(volume.Id), 1).ToBytes() do := b.Etcd.Client.Txn(b.getContext()).If( etcdv3.Compare(etcdv3.Version(etcd.MkKey("volumes", volume.Name)), "=", 0), ).Then( etcdv3.OpPut(etcd.MkKey("volumes", volume.Name), string(etcd.Uint64ToBytes(volume.Id))), etcdv3.OpPut(etcd.MkKey("volumeid", etcd.Uint64ToHex(volume.Id)), string(vbytes)), etcdv3.OpPut(etcd.MkKey("volumemeta", etcd.Uint64ToHex(volume.Id), "inode"), string(etcd.Uint64ToBytes(1))), etcdv3.OpPut(etcd.MkKey("volumemeta", etcd.Uint64ToHex(volume.Id), "blockinode"), string(inodeBytes)), ) resp, err := do.Commit() if err != nil { return err } if !resp.Succeeded { return torus.ErrExists } return nil }
func (s *store) conditionalDelete(ctx context.Context, key string, out runtime.Object, v reflect.Value, precondtions *storage.Preconditions) error { getResp, err := s.client.KV.Get(ctx, key) if err != nil { return err } for { origState, err := s.getState(getResp, key, v, false) if err != nil { return err } if err := checkPreconditions(key, precondtions, origState.obj); err != nil { return err } txnResp, err := s.client.KV.Txn(ctx).If( clientv3.Compare(clientv3.ModRevision(key), "=", origState.rev), ).Then( clientv3.OpDelete(key), ).Else( clientv3.OpGet(key), ).Commit() if err != nil { return err } if !txnResp.Succeeded { getResp = (*clientv3.GetResponse)(txnResp.Responses[0].GetResponseRange()) glog.V(4).Infof("deletion of %s failed because of a conflict, going to retry", key) continue } return decode(s.codec, s.versioner, origState.data, out, origState.rev) } }
// GuaranteedUpdate implements storage.Interface.GuaranteedUpdate. func (s *store) GuaranteedUpdate(ctx context.Context, key string, out runtime.Object, ignoreNotFound bool, precondtions *storage.Preconditions, tryUpdate storage.UpdateFunc) error { v, err := conversion.EnforcePtr(out) if err != nil { panic("unable to convert output object to pointer") } key = keyWithPrefix(s.pathPrefix, key) getResp, err := s.client.KV.Get(ctx, key) if err != nil { return err } for { origState, err := s.getState(getResp, key, v, ignoreNotFound) if err != nil { return err } if err := checkPreconditions(key, precondtions, origState.obj); err != nil { return err } ret, ttl, err := s.updateState(origState, tryUpdate) if err != nil { return err } data, err := runtime.Encode(s.codec, ret) if err != nil { return err } if bytes.Equal(data, origState.data) { return decode(s.codec, s.versioner, origState.data, out, origState.rev) } opts, err := s.ttlOpts(ctx, int64(ttl)) if err != nil { return err } txnResp, err := s.client.KV.Txn(ctx).If( clientv3.Compare(clientv3.ModRevision(key), "=", origState.rev), ).Then( clientv3.OpPut(key, string(data), opts...), ).Else( clientv3.OpGet(key), ).Commit() if err != nil { return err } if !txnResp.Succeeded { getResp = (*clientv3.GetResponse)(txnResp.Responses[0].GetResponseRange()) glog.V(4).Infof("GuaranteedUpdate of %s failed because of a conflict, going to retry", key) continue } putResp := txnResp.Responses[0].GetResponsePut() return decode(s.codec, s.versioner, data, out, putResp.Header.Revision) } }
func (b *blockEtcd) Unlock() error { vid := uint64(b.vid) k := etcd.MkKey("volumemeta", etcd.Uint64ToHex(vid), "blocklock") tx := b.Etcd.Client.Txn(b.getContext()).If( etcdv3.Compare(etcdv3.Version(k), ">", 0), etcdv3.Compare(etcdv3.Value(k), "=", b.Etcd.UUID()), ).Then( etcdv3.OpDelete(etcd.MkKey("volumemeta", etcd.Uint64ToHex(vid), "blocklock")), ) resp, err := tx.Commit() if err != nil { return err } if !resp.Succeeded { return torus.ErrLocked } return nil }
func (b *blockEtcd) SaveSnapshot(name string) error { vid := uint64(b.vid) for { sshotKey := etcd.MkKey("volumemeta", etcd.Uint64ToHex(vid), "snapshots", name) inoKey := etcd.MkKey("volumemeta", etcd.Uint64ToHex(vid), "blockinode") tx := b.Etcd.Client.Txn(b.getContext()).If( etcdv3.Compare(etcdv3.Version(sshotKey), "=", 0), ).Then( etcdv3.OpGet(inoKey), ) resp, err := tx.Commit() if err != nil { return err } if !resp.Succeeded { return torus.ErrExists } v := resp.Responses[0].GetResponseRange().Kvs[0] inode := Snapshot{ Name: name, When: time.Now(), INodeRef: v.Value, } bytes, err := json.Marshal(inode) if err != nil { return err } tx = b.Etcd.Client.Txn(b.getContext()).If( etcdv3.Compare(etcdv3.Version(inoKey), "=", v.Version), ).Then( etcdv3.OpPut(sshotKey, string(bytes)), ) resp, err = tx.Commit() if err != nil { return err } if !resp.Succeeded { continue } return nil } }
func (b *blockEtcd) SyncINode(inode torus.INodeRef) error { vid := uint64(inode.Volume()) inodeBytes := string(inode.ToBytes()) k := etcd.MkKey("volumemeta", etcd.Uint64ToHex(vid), "blocklock") tx := b.Etcd.Client.Txn(b.getContext()).If( etcdv3.Compare(etcdv3.Version(k), ">", 0), etcdv3.Compare(etcdv3.Value(k), "=", b.Etcd.UUID()), ).Then( etcdv3.OpPut(etcd.MkKey("volumemeta", etcd.Uint64ToHex(vid), "blockinode"), inodeBytes), ) resp, err := tx.Commit() if err != nil { return err } if !resp.Succeeded { return torus.ErrLocked } return nil }
// deleteRevKey deletes a key by revision, returning false if key is missing func deleteRevKey(kv v3.KV, key string, rev int64) (bool, error) { cmp := v3.Compare(v3.ModRevision(key), "=", rev) req := v3.OpDelete(key) txnresp, err := kv.Txn(context.TODO()).If(cmp).Then(req).Commit() if err != nil { return false, err } else if !txnresp.Succeeded { return false, nil } return true, nil }
// putNewKV attempts to create the given key, only succeeding if the key did // not yet exist. func putNewKV(kv v3.KV, key, val string, leaseID v3.LeaseID) (int64, error) { cmp := v3.Compare(v3.Version(key), "=", 0) req := v3.OpPut(key, val, v3.WithLease(leaseID)) txnresp, err := kv.Txn(context.TODO()).If(cmp).Then(req).Commit() if err != nil { return 0, err } if !txnresp.Succeeded { return 0, ErrKeyExists } return txnresp.Header.Revision, nil }
// Lock locks resources on sync // This call blocks until you can get lock func (s *Sync) Lock(path string, block bool) error { for { if s.HasLock(path) { return nil } var err error lease, err := s.etcdClient.Grant(s.withTimeout(), masterTTL) var resp *etcd.TxnResponse if err == nil { cmp := etcd.Compare(etcd.CreateRevision(path), "=", 0) put := etcd.OpPut(path, s.processID, etcd.WithLease(lease.ID)) resp, err = s.etcdClient.Txn(s.withTimeout()).If(cmp).Then(put).Commit() } if err != nil || !resp.Succeeded { msg := fmt.Sprintf("failed to lock path %s", path) if err != nil { msg = fmt.Sprintf("failed to lock path %s: %s", path, err) } log.Notice(msg) s.locks.Remove(path) if !block { return errors.New(msg) } time.Sleep(masterTTL * time.Second) continue } log.Info("Locked %s", path) s.locks.Set(path, lease.ID) //Refresh master token go func() { defer func() { log.Notice("releasing keepalive lock for %s", path) s.locks.Remove(path) }() for s.HasLock(path) { ch, err := s.etcdClient.KeepAlive(s.withTimeout(), lease.ID) if err != nil { log.Notice("failed to keepalive lock for %s %s", path, err) return } for range ch { } } }() return nil } }
func NewUniqueKV(ctx context.Context, kv v3.KV, pfx, val string, opts ...v3.OpOption) (string, int64, error) { for { newKey := fmt.Sprintf("%s/%v", pfx, time.Now().UnixNano()) put := v3.OpPut(newKey, val, opts...) cmp := v3.Compare(v3.ModifiedRevision(newKey), "=", 0) resp, err := kv.Txn(ctx).If(cmp).Then(put).Commit() if err != nil { return "", 0, err } if !resp.Succeeded { continue } return newKey, resp.Header.Revision, nil } }
func (b *blockEtcd) DeleteSnapshot(name string) error { vid := uint64(b.vid) k := etcd.MkKey("volumemeta", etcd.Uint64ToHex(vid), "snapshots", name) tx := b.Etcd.Client.Txn(b.getContext()).If( etcdv3.Compare(etcdv3.Version(k), ">", 0), ).Then( etcdv3.OpDelete(k), ) resp, err := tx.Commit() if err != nil { return err } if !resp.Succeeded { return torus.ErrLocked } return nil }
// commit attempts to apply the txn's changes to the server. func (s *STM) commit() (ok bool, rr error) { // read set must not change cmps := make([]v3.Cmp, 0, len(s.rset)) for k, rk := range s.rset { // use < to support updating keys that don't exist yet cmp := v3.Compare(v3.ModifiedRevision(k), "<", rk.Revision()+1) cmps = append(cmps, cmp) } // apply all writes puts := make([]v3.Op, 0, len(s.wset)) for k, v := range s.wset { puts = append(puts, v3.OpPut(k, v)) } txnresp, err := s.client.Txn(context.TODO()).If(cmps...).Then(puts...).Commit() return txnresp.Succeeded, err }
// Proclaim lets the leader announce a new value without another election. func (e *Election) Proclaim(ctx context.Context, val string) error { if e.leaderSession == nil { return ErrElectionNotLeader } cmp := v3.Compare(v3.CreateRevision(e.leaderKey), "=", e.leaderRev) txn := e.client.Txn(ctx).If(cmp) txn = txn.Then(v3.OpPut(e.leaderKey, val, v3.WithLease(e.leaderSession.Lease()))) tresp, terr := txn.Commit() if terr != nil { return terr } if !tresp.Succeeded { e.leaderKey = "" return ErrElectionNotLeader } return nil }
func initEtcdMetadata(cfg torus.Config, gmd torus.GlobalMetadata, ringType torus.RingType) error { gmdbytes, err := json.Marshal(gmd) if err != nil { return err } emptyRing, err := ring.CreateRing(&models.Ring{ Type: uint32(ringType), Version: 1, ReplicationFactor: 2, }) if err != nil { return err } ringb, err := emptyRing.Marshal() if err != nil { return err } client, err := etcdv3.New(etcdv3.Config{Endpoints: []string{cfg.MetadataAddress}, TLS: cfg.TLS}) if err != nil { return err } defer client.Close() txn := client.Txn(context.Background()) resp, err := txn.If( etcdv3.Compare(etcdv3.Version(MkKey("meta", "globalmetadata")), "=", 0), ).Then( etcdv3.OpPut(MkKey("meta", "volumeminter"), string(Uint64ToBytes(1))), etcdv3.OpPut(MkKey("meta", "globalmetadata"), string(gmdbytes)), ).Commit() if err != nil { return err } if !resp.Succeeded { return torus.ErrExists } _, err = client.Put(context.Background(), MkKey("meta", "the-one-ring"), string(ringb)) if err != nil { return err } return nil }
func cas(key, value, newValue string, kvc clientv3.KV) error { ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) tnxRes, err := kvc.Txn(ctx). If(clientv3.Compare(clientv3.Value(key), "=", value)). Then(clientv3.OpPut(key, newValue)). Else(clientv3.OpGet(key)). Commit() cancel() if err != nil { return err } if tnxRes.Succeeded { return nil } log.Println(string(tnxRes.Responses[0].GetResponseRange().Kvs[0].Value)) return errors.New("release error") }
func (b *blockEtcd) DeleteVolume() error { vid := uint64(b.vid) tx := b.Etcd.Client.Txn(b.getContext()).If( etcdv3.Compare(etcdv3.Version(etcd.MkKey("volumemeta", etcd.Uint64ToHex(vid), "blocklock")), "=", 0), ).Then( etcdv3.OpDelete(etcd.MkKey("volumes", b.name)), etcdv3.OpDelete(etcd.MkKey("volumeid", etcd.Uint64ToHex(vid))), etcdv3.OpDelete(etcd.MkKey("volumemeta", etcd.Uint64ToHex(vid)), etcdv3.WithPrefix()), ) resp, err := tx.Commit() if err != nil { return err } if !resp.Succeeded { return torus.ErrLocked } return nil }
func (b *blockEtcd) Lock(lease int64) error { if lease == 0 { return torus.ErrInvalid } k := etcd.MkKey("volumemeta", etcd.Uint64ToHex(uint64(b.vid)), "blocklock") tx := b.Etcd.Client.Txn(b.getContext()).If( etcdv3.Compare(etcdv3.Version(k), "=", 0), ).Then( etcdv3.OpPut(k, b.Etcd.UUID(), etcdv3.WithLease(etcdv3.LeaseID(lease))), ) resp, err := tx.Commit() if err != nil { return err } if !resp.Succeeded { return torus.ErrLocked } return nil }
func (c *etcdCtx) AtomicModifyKey(k []byte, f AtomicModifyFunc) (interface{}, error) { key := string(k) resp, err := c.etcd.Client.Get(c.getContext(), key) if err != nil { return nil, err } var version int64 var value []byte if len(resp.Kvs) != 1 { version = 0 value = []byte{} } else { kv := resp.Kvs[0] version = kv.Version value = kv.Value } for { newBytes, fval, err := f(value) if err != nil { return nil, err } txn := c.etcd.Client.Txn(c.getContext()).If( etcdv3.Compare(etcdv3.Version(key), "=", version), ).Then( etcdv3.OpPut(key, string(newBytes)), ).Else( etcdv3.OpGet(key), ) resp, err := txn.Commit() if err != nil { return nil, err } if resp.Succeeded { return fval, nil } promAtomicRetries.WithLabelValues(string(key)).Inc() kv := resp.Responses[0].GetResponseRange().Kvs[0] version = kv.Version value = kv.Value } }
// Campaign puts a value as eligible for the election. It blocks until // it is elected, an error occurs, or the context is cancelled. func (e *Election) Campaign(ctx context.Context, val string) error { s, serr := NewSession(e.client) if serr != nil { return serr } k := fmt.Sprintf("%s/%x", e.keyPrefix, s.Lease()) txn := e.client.Txn(ctx).If(v3.Compare(v3.CreateRevision(k), "=", 0)) txn = txn.Then(v3.OpPut(k, val, v3.WithLease(s.Lease()))) txn = txn.Else(v3.OpGet(k)) resp, err := txn.Commit() if err != nil { return err } e.leaderKey, e.leaderRev, e.leaderSession = k, resp.Header.Revision, s if !resp.Succeeded { kv := resp.Responses[0].GetResponseRange().Kvs[0] e.leaderRev = kv.CreateRevision if string(kv.Value) != val { if err = e.Proclaim(ctx, val); err != nil { e.Resign(ctx) return err } } } err = waitDeletes(ctx, e.client, e.keyPrefix, v3.WithPrefix(), v3.WithRev(e.leaderRev-1)) if err != nil { // clean up in case of context cancel select { case <-ctx.Done(): e.Resign(e.client.Ctx()) default: e.leaderSession = nil } return err } return nil }
func putKeyAtMostOnce(ctx context.Context, client *clientv3.Client, key string) error { gr, err := getKey(ctx, client, key) if err != nil { return err } var modrev int64 if len(gr.Kvs) > 0 { modrev = gr.Kvs[0].ModRevision } for ctx.Err() == nil { _, err := client.Txn(ctx).If(clientv3.Compare(clientv3.ModRevision(key), "=", modrev)).Then(clientv3.OpPut(key, key)).Commit() if err == nil { return nil } } return ctx.Err() }
// newSequentialKV allocates a new sequential key <prefix>/nnnnn with a given // value and lease. Note: a bookkeeping node __<prefix> is also allocated. func newSequentialKV(kv v3.KV, prefix, val string, leaseID v3.LeaseID) (*RemoteKV, error) { resp, err := kv.Get(context.TODO(), prefix, v3.WithLastKey()...) if err != nil { return nil, err } // add 1 to last key, if any newSeqNum := 0 if len(resp.Kvs) != 0 { fields := strings.Split(string(resp.Kvs[0].Key), "/") _, serr := fmt.Sscanf(fields[len(fields)-1], "%d", &newSeqNum) if serr != nil { return nil, serr } newSeqNum++ } newKey := fmt.Sprintf("%s/%016d", prefix, newSeqNum) // base prefix key must be current (i.e., <=) with the server update; // the base key is important to avoid the following: // N1: LastKey() == 1, start txn. // N2: New Key 2, New Key 3, Delete Key 2 // N1: txn succeeds allocating key 2 when it shouldn't baseKey := "__" + prefix // current revision might contain modification so +1 cmp := v3.Compare(v3.ModRevision(baseKey), "<", resp.Header.Revision+1) reqPrefix := v3.OpPut(baseKey, "", v3.WithLease(leaseID)) reqNewKey := v3.OpPut(newKey, val, v3.WithLease(leaseID)) txn := kv.Txn(context.TODO()) txnresp, err := txn.If(cmp).Then(reqPrefix, reqNewKey).Commit() if err != nil { return nil, err } if !txnresp.Succeeded { return newSequentialKV(kv, prefix, val, leaseID) } return &RemoteKV{kv, newKey, txnresp.Header.Revision, val}, nil }
func (e *Etcd) getGlobalMetadata() error { txn := e.Client.Txn(context.Background()) resp, err := txn.If( etcdv3.Compare(etcdv3.Version(MkKey("meta", "globalmetadata")), ">", 0), ).Then( etcdv3.OpGet(MkKey("meta", "globalmetadata")), ).Commit() if err != nil { return err } if !resp.Succeeded { return torus.ErrNoGlobalMetadata } var gmd torus.GlobalMetadata err = json.Unmarshal(resp.Responses[0].GetResponseRange().Kvs[0].Value, &gmd) if err != nil { return err } e.global = gmd return nil }
func ExampleKV_txn() { cli, err := clientv3.New(clientv3.Config{ Endpoints: endpoints, DialTimeout: dialTimeout, }) if err != nil { log.Fatal(err) } defer cli.Close() kvc := clientv3.NewKV(cli) _, err = kvc.Put(context.TODO(), "key", "xyz") if err != nil { log.Fatal(err) } ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) _, err = kvc.Txn(ctx). If(clientv3.Compare(clientv3.Value("key"), ">", "abc")). // txn value comparisons are lexical Then(clientv3.OpPut("key", "XYZ")). // this runs, since 'xyz' > 'abc' Else(clientv3.OpPut("key", "ABC")). Commit() cancel() if err != nil { log.Fatal(err) } gresp, err := kvc.Get(context.TODO(), "key") cancel() if err != nil { log.Fatal(err) } for _, ev := range gresp.Kvs { fmt.Printf("%s : %s\n", ev.Key, ev.Value) } // key : XYZ }
func notFound(key string) clientv3.Cmp { return clientv3.Compare(clientv3.ModRevision(key), "=", 0) }
func (m *Mutex) IsOwner() v3.Cmp { return v3.Compare(v3.CreateRevision(m.myKey), "=", m.myRev) }