// setAppliedIndex persists a new applied index. func setAppliedIndex(eng engine.Engine, raftID proto.RaftID, appliedIndex uint64) error { return engine.MVCCPut(eng, nil, /* stats */ keys.RaftAppliedIndexKey(raftID), proto.ZeroTimestamp, proto.Value{Bytes: encoding.EncodeUint64(nil, appliedIndex)}, nil /* txn */) }
func generateUniqueBytes(nodeID uint32) DBytes { // Unique bytes are composed of the current time in nanoseconds and the // node-id. If the nanosecond value is the same on two consecutive calls to // time.Now() the nanoseconds value is incremented. The node-id is varint // encoded. Since node-ids are allocated consecutively starting at 1, the // node-id field will consume 1 or 2 bytes for any reasonably sized cluster. // // TODO(pmattis): Do we have to worry about persisting the milliseconds value // periodically to avoid the clock ever going backwards (e.g. due to NTP // adjustment)? nanos := uint64(time.Now().UnixNano()) uniqueBytesState.Lock() if nanos <= uniqueBytesState.nanos { nanos = uniqueBytesState.nanos + 1 } uniqueBytesState.nanos = nanos uniqueBytesState.Unlock() b := make([]byte, 0, 8+binary.MaxVarintLen32) b = encoding.EncodeUint64(b, nanos) // We use binary.PutUvarint instead of encoding.EncodeUvarint because the // former uses less space for values < 128 which is a common occurrence for // node IDs. n := binary.PutUvarint(b[len(b):len(b)+binary.MaxVarintLen32], uint64(nodeID)) return DBytes(b[:len(b)+n]) }
// setAppliedIndex persists a new applied index. func setAppliedIndex(eng engine.Engine, rangeID roachpb.RangeID, appliedIndex uint64) error { return engine.MVCCPut(eng, nil, /* stats */ keys.RaftAppliedIndexKey(rangeID), roachpb.ZeroTimestamp, roachpb.MakeValueFromBytes(encoding.EncodeUint64(nil, appliedIndex)), nil /* txn */) }
// SimulateNetwork runs until the simCallback returns false. // // At each cycle, every node gossips a key equal to its address (unique) // with the cycle as the value. The received cycle value can be used // to determine the aging of information between any two nodes in the // network. // // At each cycle of the simulation, node 0 gossips the sentinel. // // The simulation callback receives the cycle and the network as arguments. func (n *Network) SimulateNetwork(simCallback func(cycle int, network *Network) bool) { nodes := n.Nodes for cycle := 1; simCallback(cycle, n); cycle++ { // Node 0 gossips sentinel every cycle. if err := nodes[0].Gossip.AddInfo(gossip.KeySentinel, encoding.EncodeUint64(nil, uint64(cycle)), time.Hour); err != nil { log.Fatal(err) } // Every node gossips cycle. for _, node := range nodes { if err := node.Gossip.AddInfo(node.Server.Addr().String(), encoding.EncodeUint64(nil, uint64(cycle)), time.Hour); err != nil { log.Fatal(err) } node.Gossip.SimulationCycle() } time.Sleep(5 * time.Millisecond) } }
// ResponseCacheKey returns a range-local key by Raft ID for a // response cache entry, with detail specified by encoding the // supplied client command ID. func ResponseCacheKey(raftID int64, cmdID *proto.ClientCmdID) proto.Key { detail := proto.Key{} if cmdID != nil { detail = encoding.EncodeUvarint(nil, uint64(cmdID.WallTime)) // wall time helps sort for locality detail = encoding.EncodeUint64(detail, uint64(cmdID.Random)) } return MakeRangeIDKey(raftID, KeyLocalResponseCacheSuffix, detail) }
// ResponseCacheKey returns a range-local key by Range ID for a // response cache entry, with detail specified by encoding the // supplied client command ID. func ResponseCacheKey(rangeID roachpb.RangeID, cmdID *roachpb.ClientCmdID) roachpb.Key { detail := roachpb.RKey{} if cmdID != nil { // Wall time helps sort for locality. detail = encoding.EncodeUvarint(nil, uint64(cmdID.WallTime)) detail = encoding.EncodeUint64(detail, uint64(cmdID.Random)) } return MakeRangeIDKey(rangeID, LocalResponseCacheSuffix, detail) }
// computeChecksum computes a checksum based on the provided key and // the contents of the value. If the value contains a byte slice, the // checksum includes it directly; if the value contains an integer, // the checksum includes the integer as 8 bytes in big-endian order. func (v *Value) computeChecksum(key []byte) uint32 { c := encoding.NewCRC32Checksum(key) if v.Bytes != nil { c.Write(v.Bytes) } else if v.Integer != nil { c.Write(encoding.EncodeUint64(nil, uint64(v.GetInteger()))) } return c.Sum32() }
// NewNetwork creates nodeCount gossip nodes. The networkType should // be set to either "tcp" or "unix". func NewNetwork(nodeCount int, networkType string) *Network { clock := hlc.NewClock(hlc.UnixNano) log.Infof("simulating gossip network with %d nodes", nodeCount) stopper := stop.NewStopper() rpcContext := rpc.NewContext(&base.Context{Insecure: true}, clock, stopper) tlsConfig, err := rpcContext.GetServerTLSConfig() if err != nil { log.Fatal(err) } nodes := make([]*Node, nodeCount) for i := range nodes { server := rpc.NewServer(rpcContext) testAddr := util.CreateTestAddr(networkType) ln, err := util.ListenAndServe(stopper, server, testAddr, tlsConfig) if err != nil { log.Fatal(err) } nodes[i] = &Node{Server: server, Addr: ln.Addr()} } for i, leftNode := range nodes { // Build new resolvers for each instance or we'll get data races. resolvers := []resolver.Resolver{resolver.NewResolverFromAddress(nodes[0].Addr)} gossipNode := gossip.New(rpcContext, resolvers) addr := leftNode.Addr gossipNode.SetNodeID(roachpb.NodeID(i + 1)) if err := gossipNode.SetNodeDescriptor(&roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(i + 1), Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), }); err != nil { log.Fatal(err) } if err := gossipNode.AddInfo(addr.String(), encoding.EncodeUint64(nil, 0), time.Hour); err != nil { log.Fatal(err) } gossipNode.Start(leftNode.Server, addr, stopper) gossipNode.EnableSimulationCycler(true) leftNode.Gossip = gossipNode } return &Network{ Nodes: nodes, NetworkType: networkType, Stopper: stopper, } }
// SimulateNetwork runs until the simCallback returns false. // // At each cycle, every node gossips a key equal to its address (unique) // with the cycle as the value. The received cycle value can be used // to determine the aging of information between any two nodes in the // network. // // At each cycle of the simulation, node 0 gossips the sentinel. // // The simulation callback receives the cycle and the network as arguments. func (n *Network) SimulateNetwork(simCallback func(cycle int, network *Network) bool) { nodes := n.Nodes for cycle := 1; simCallback(cycle, n); cycle++ { // Node 0 gossips sentinel & cluster ID every cycle. if err := nodes[0].Gossip.AddInfo(gossip.KeySentinel, encoding.EncodeUint64(nil, uint64(cycle)), time.Hour); err != nil { log.Fatal(err) } if err := nodes[0].Gossip.AddInfo(gossip.KeyClusterID, encoding.EncodeUint64(nil, uint64(cycle)), 0*time.Second); err != nil { log.Fatal(err) } // Every node gossips cycle. for _, node := range nodes { if err := node.Gossip.AddInfo(node.Addr.String(), encoding.EncodeUint64(nil, uint64(cycle)), time.Hour); err != nil { log.Fatal(err) } node.Gossip.SimulationCycle() } time.Sleep(5 * time.Millisecond) } log.Infof("gossip network simulation: total infos sent=%d, received=%d", n.InfosSent(), n.InfosReceived()) }
// StartNode initializes a gossip instance for the simulation node and // starts it. func (n *Network) StartNode(node *Node) error { n.nodeIDAllocator++ node.Gossip.SetNodeID(n.nodeIDAllocator) if err := node.Gossip.SetNodeDescriptor(&roachpb.NodeDescriptor{ NodeID: node.Gossip.GetNodeID(), Address: util.MakeUnresolvedAddr(node.Addr.Network(), node.Addr.String()), }); err != nil { return err } if err := node.Gossip.AddInfo(node.Addr.String(), encoding.EncodeUint64(nil, 0), time.Hour); err != nil { return err } node.Gossip.Start(node.Server, node.Addr, n.Stopper) node.Gossip.EnableSimulationCycler(true) return nil }
// SimulateNetwork runs a number of gossipInterval periods within the // given Network. After each gossipInterval period, simCallback is // invoked. When it returns false, the simulation ends. If it returns // true, the simulation continues another cycle. // // Node0 gossips the node count as well as the gossip sentinel. The // gossip bootstrap hosts are set to the first three nodes (or fewer // if less than three are available). // // At each cycle of the simulation, node 0 gossips the sentinel. If // the simulation requires other nodes to gossip, this should be done // via simCallback. // // The simulation callback receives a map of nodes, keyed by node address. func (n *Network) SimulateNetwork( simCallback func(cycle int, network *Network) bool) { nodes := n.Nodes var complete bool for cycle := 0; !complete; cycle++ { select { case <-time.After(n.GossipInterval): // Node 0 gossips sentinel every cycle. if err := nodes[0].Gossip.AddInfo(gossip.KeySentinel, encoding.EncodeUint64(nil, uint64(cycle)), time.Hour); err != nil { log.Fatal(err) } if !simCallback(cycle, n) { complete = true } } } }
// RunUntilFullyConnected blocks until the gossip network has received // gossip from every other node in the network. It returns the gossip // cycle at which the network became fully connected. func (n *Network) RunUntilFullyConnected() int { var connectedAtCycle int n.SimulateNetwork(func(cycle int, network *Network) bool { // Every node should gossip. for _, node := range network.Nodes { if err := node.Gossip.AddInfo(node.Server.Addr().String(), encoding.EncodeUint64(nil, uint64(cycle)), time.Hour); err != nil { log.Fatal(err) } } if network.isNetworkConnected() { connectedAtCycle = cycle return false } return true }) return connectedAtCycle }
// SetInteger encodes the specified int64 value into the bytes field of the // receiver. func (v *Value) SetInteger(i int64) { v.Bytes = encoding.EncodeUint64(nil, uint64(i)) }
func makeCmdIDKey(cmdID proto.ClientCmdID) cmdIDKey { buf := make([]byte, 0, 16) buf = encoding.EncodeUint64(buf, uint64(cmdID.WallTime)) buf = encoding.EncodeUint64(buf, uint64(cmdID.Random)) return cmdIDKey(string(buf)) }
// RaftLogKey returns a system-local key for a Raft log entry. func RaftLogKey(rangeID roachpb.RangeID, logIndex uint64) roachpb.Key { return MakeRangeIDKey(rangeID, localRaftLogSuffix, encoding.EncodeUint64(nil, logIndex)) }
// setLastIndex persists a new last index. func setLastIndex(eng engine.Engine, rangeID roachpb.RangeID, lastIndex uint64) error { return engine.MVCCPut(eng, nil, keys.RaftLastIndexKey(rangeID), roachpb.ZeroTimestamp, roachpb.MakeValueFromBytes(encoding.EncodeUint64(nil, lastIndex)), nil) }
// SetFloat encodes the specified float64 value into the bytes field of the // receiver, sets the tag and clears the checksum. func (v *Value) SetFloat(f float64) { v.RawBytes = make([]byte, headerSize+8) encoding.EncodeUint64(v.RawBytes[headerSize:headerSize], math.Float64bits(f)) v.setTag(ValueType_FLOAT) }
// SetFloat encodes the specified float64 value into the bytes field of the // receiver and sets the tag. func (v *Value) SetFloat(f float64) { v.RawBytes = encoding.EncodeUint64(nil, math.Float64bits(f)) v.Tag = ValueType_FLOAT }
func (b *Batch) fillResults(br *roachpb.BatchResponse, pErr *roachpb.Error) error { offset := 0 for i := range b.Results { result := &b.Results[i] for k := 0; k < result.calls; k++ { args := b.reqs[offset+k] var reply roachpb.Response if result.Err == nil { result.Err = pErr.GoError() if result.Err == nil { if offset+k < len(br.Responses) { reply = br.Responses[offset+k].GetValue().(roachpb.Response) } else if args.Method() != roachpb.EndTransaction { // TODO(tschottdorf): EndTransaction is excepted here // because it may be elided (r/o txns). Might prefer to // simulate an EndTransaction response instead; this // effectively just leaks here. panic("not enough responses for calls") } } } switch req := args.(type) { case *roachpb.GetRequest: row := &result.Rows[k] row.Key = []byte(req.Key) if result.Err == nil { row.Value = reply.(*roachpb.GetResponse).Value } case *roachpb.PutRequest: row := &result.Rows[k] row.Key = []byte(req.Key) if result.Err == nil { row.Value = &req.Value row.setTimestamp(reply.(*roachpb.PutResponse).Timestamp) } case *roachpb.ConditionalPutRequest: row := &result.Rows[k] row.Key = []byte(req.Key) if result.Err == nil { row.Value = &req.Value row.setTimestamp(reply.(*roachpb.ConditionalPutResponse).Timestamp) } case *roachpb.IncrementRequest: row := &result.Rows[k] row.Key = []byte(req.Key) if result.Err == nil { t := reply.(*roachpb.IncrementResponse) row.Value = &roachpb.Value{ Bytes: encoding.EncodeUint64(nil, uint64(t.NewValue)), Tag: roachpb.ValueType_INT, } row.setTimestamp(t.Timestamp) } case *roachpb.ScanRequest: if result.Err == nil { t := reply.(*roachpb.ScanResponse) result.Rows = make([]KeyValue, len(t.Rows)) for j := range t.Rows { src := &t.Rows[j] dst := &result.Rows[j] dst.Key = src.Key dst.Value = &src.Value } } case *roachpb.ReverseScanRequest: if result.Err == nil { t := reply.(*roachpb.ReverseScanResponse) result.Rows = make([]KeyValue, len(t.Rows)) for j := range t.Rows { src := &t.Rows[j] dst := &result.Rows[j] dst.Key = src.Key dst.Value = &src.Value } } case *roachpb.DeleteRequest: row := &result.Rows[k] row.Key = []byte(args.(*roachpb.DeleteRequest).Key) case *roachpb.DeleteRangeRequest: case *roachpb.EndTransactionRequest: case *roachpb.AdminMergeRequest: case *roachpb.AdminSplitRequest: case *roachpb.HeartbeatTxnRequest: case *roachpb.GCRequest: case *roachpb.PushTxnRequest: case *roachpb.RangeLookupRequest: case *roachpb.ResolveIntentRequest: case *roachpb.ResolveIntentRangeRequest: case *roachpb.MergeRequest: case *roachpb.TruncateLogRequest: case *roachpb.LeaderLeaseRequest: // Nothing to do for these methods as they do not generate any // rows. default: if result.Err == nil { result.Err = fmt.Errorf("unsupported reply: %T", reply) } } } offset += result.calls } for i := range b.Results { result := &b.Results[i] if result.Err != nil { return result.Err } } return nil }
// SetInt encodes the specified int64 value into the bytes field of the // receiver and sets the tag. func (v *Value) SetInt(i int64) { v.RawBytes = encoding.EncodeUint64(nil, uint64(i)) v.Tag = ValueType_INT }
// RaftLogKey returns a system-local key for a Raft log entry. func RaftLogKey(rangeID proto.RangeID, logIndex uint64) proto.Key { return MakeRangeIDKey(rangeID, LocalRaftLogSuffix, encoding.EncodeUint64(nil, logIndex)) }
// setLastIndex persists a new last index. func setLastIndex(eng engine.Engine, raftID proto.RaftID, lastIndex uint64) error { return engine.MVCCPut(eng, nil, keys.RaftLastIndexKey(raftID), proto.ZeroTimestamp, proto.Value{ Bytes: encoding.EncodeUint64(nil, lastIndex), }, nil) }
func (b *Batch) fillResults() error { offset := 0 for i := range b.Results { result := &b.Results[i] for k := 0; k < result.calls; k++ { call := b.calls[offset+k] if result.Err == nil { result.Err = call.Reply.Header().GoError() } switch t := call.Reply.(type) { case *proto.GetResponse: row := &result.Rows[k] row.Key = []byte(call.Args.(*proto.GetRequest).Key) if result.Err == nil { row.Value = t.Value } case *proto.PutResponse: req := call.Args.(*proto.PutRequest) row := &result.Rows[k] row.Key = []byte(req.Key) if result.Err == nil { row.Value = &req.Value row.setTimestamp(t.Timestamp) } case *proto.ConditionalPutResponse: req := call.Args.(*proto.ConditionalPutRequest) row := &result.Rows[k] row.Key = []byte(req.Key) if result.Err == nil { row.Value = &req.Value row.setTimestamp(t.Timestamp) } case *proto.IncrementResponse: row := &result.Rows[k] row.Key = []byte(call.Args.(*proto.IncrementRequest).Key) if result.Err == nil { row.Value = &proto.Value{ Bytes: encoding.EncodeUint64(nil, uint64(t.NewValue)), Tag: proto.ValueType_INT, } row.setTimestamp(t.Timestamp) } case *proto.ScanResponse: result.Rows = make([]KeyValue, len(t.Rows)) for j := range t.Rows { src := &t.Rows[j] dst := &result.Rows[j] dst.Key = src.Key dst.Value = &src.Value } case *proto.ReverseScanResponse: result.Rows = make([]KeyValue, len(t.Rows)) for j := range t.Rows { src := &t.Rows[j] dst := &result.Rows[j] dst.Key = src.Key dst.Value = &src.Value } case *proto.DeleteResponse: row := &result.Rows[k] row.Key = []byte(call.Args.(*proto.DeleteRequest).Key) case *proto.DeleteRangeResponse: case *proto.EndTransactionResponse: case *proto.AdminMergeResponse: case *proto.AdminSplitResponse: case *proto.HeartbeatTxnResponse: case *proto.GCResponse: case *proto.PushTxnResponse: case *proto.RangeLookupResponse: case *proto.ResolveIntentResponse: case *proto.ResolveIntentRangeResponse: case *proto.MergeResponse: case *proto.TruncateLogResponse: case *proto.LeaderLeaseResponse: case *proto.BatchResponse: // Nothing to do for these methods as they do not generate any // rows. default: if result.Err == nil { result.Err = fmt.Errorf("unsupported reply: %T", call.Reply) } } } offset += result.calls } for i := range b.Results { result := &b.Results[i] if result.Err != nil { return result.Err } } return nil }