Ejemplo n.º 1
func GroupWrite(c *GroupClientConfig) {
	defer c.wg.Done()
	var err error
	var opts []grpc.DialOption
	var creds credentials.TransportAuthenticator
	creds = credentials.NewTLS(&tls.Config{
		InsecureSkipVerify: true,
	opts = append(opts, grpc.WithTransportCredentials(creds))
	conn, err := grpc.Dial(c.addr, opts...)
	if err != nil {
		log.Fatalln(fmt.Sprintf("Failed to dial server: %s", err))
	defer conn.Close()
	client := gp.NewGroupStoreClient(conn)
	w := &gp.WriteRequest{
		Value: *c.value,
	empty := []byte("")
	for i, _ := range c.wm {
		ctx, _ := context.WithTimeout(context.Background(), 10*time.Second)
		c.wm[i].Value = *c.value
		w.TimestampMicro = brimtime.TimeToUnixMicro(time.Now())
		res, err := client.Write(ctx, c.wm[i])
		if err != nil {
			log.Println("Client", c.id, ":", err)
		if res.TimestampMicro > w.TimestampMicro {
			log.Printf("TSM is newer than attempted, Key %d-%d Got %s, Sent: %s", c.id, i, brimtime.UnixMicroToTime(res.TimestampMicro), brimtime.UnixMicroToTime(w.TimestampMicro))
		c.wm[i].Value = empty
Ejemplo n.º 2
// RevokeAddrFS ...
func (s *FileSystemAPIServer) RevokeAddrFS(ctx context.Context, r *pb.RevokeAddrFSRequest) (*pb.RevokeAddrFSResponse, error) {
	var err error
	srcAddr := ""

	// Get incomming ip
	pr, ok := peer.FromContext(ctx)
	if ok {
		srcAddr = pr.Addr.String()
	// Validate Token
	_, err = s.validateToken(r.Token)
	if err != nil {
		log.Printf("%s REVOKE FAILED %s\n", srcAddr, "PermissionDenied")
		return nil, errf(codes.PermissionDenied, "%v", "Invalid Token")

	// REVOKE an file system entry for the addr
	// 		delete /fs/FSID/addr			addr						AddrRef
	pKey := fmt.Sprintf("/fs/%s/addr", r.FSid)
	pKeyA, pKeyB := murmur3.Sum128([]byte(pKey))
	cKeyA, cKeyB := murmur3.Sum128([]byte(r.Addr))
	timestampMicro := brimtime.TimeToUnixMicro(time.Now())
	_, err = s.gstore.Delete(context.Background(), pKeyA, pKeyB, cKeyA, cKeyB, timestampMicro)
	if store.IsNotFound(err) {
		log.Printf("%s REVOKE FAILED %s %s\n", srcAddr, r.FSid, r.Addr)
		return nil, errf(codes.NotFound, "%v", "Not Found")

	// return Addr was revoked
	// Log Operation
	log.Printf("%s REVOKE SUCCESS %s %s\n", srcAddr, r.FSid, r.Addr)
	return &pb.RevokeAddrFSResponse{Data: r.FSid}, nil
Ejemplo n.º 3
// RevokeAddrFS ...
func (s *FileSystemAPIServer) RevokeAddrFS(ctx context.Context, r *pb.RevokeAddrFSRequest) (*pb.RevokeAddrFSResponse, error) {
	var err error
	var acctID string
	var value []byte
	var fsRef FileSysRef
	srcAddr := ""

	// Get incomming ip
	pr, ok := peer.FromContext(ctx)
	if ok {
		srcAddr = pr.Addr.String()
	// Validate Token
	acctID, err = s.validateToken(r.Token)
	if err != nil {
		log.Printf("%s REVOKE FAILED %s\n", srcAddr, "PermissionDenied")
		return nil, errf(codes.PermissionDenied, "%v", "Invalid Token")
	// Validate Token/Account owns this file system
	// Read FileSysRef entry to determine if it exists
	pKey := fmt.Sprintf("/fs")
	pKeyA, pKeyB := murmur3.Sum128([]byte(pKey))
	cKeyA, cKeyB := murmur3.Sum128([]byte(r.FSid))
	_, value, err = s.gstore.Read(context.Background(), pKeyA, pKeyB, cKeyA, cKeyB, nil)
	if store.IsNotFound(err) {
		log.Printf("%s REVOKE FAILED %s NOTFOUND", srcAddr, r.FSid)
		return nil, errf(codes.NotFound, "%v", "Not Found")
	if err != nil {
		log.Printf("%s REVOKE FAILED %v\n", srcAddr, err)
		return nil, errf(codes.Internal, "%v", err)
	err = json.Unmarshal(value, &fsRef)
	if err != nil {
		log.Printf("%s REVOKE FAILED %v\n", srcAddr, err)
		return nil, errf(codes.Internal, "%v", err)
	if fsRef.AcctID != acctID {
		log.Printf("$s REVOKE FAILED %v ACCOUNT MISMATCH", r.FSid)
		return nil, errf(codes.FailedPrecondition, "%v", "Account Mismatch")

	// REVOKE an file system entry for the addr
	// 		delete /fs/FSID/addr			addr						AddrRef
	pKey = fmt.Sprintf("/fs/%s/addr", r.FSid)
	pKeyA, pKeyB = murmur3.Sum128([]byte(pKey))
	cKeyA, cKeyB = murmur3.Sum128([]byte(r.Addr))
	timestampMicro := brimtime.TimeToUnixMicro(time.Now())
	_, err = s.gstore.Delete(context.Background(), pKeyA, pKeyB, cKeyA, cKeyB, timestampMicro)
	if store.IsNotFound(err) {
		log.Printf("%s REVOKE FAILED %s %s\n", srcAddr, r.FSid, r.Addr)
		return nil, errf(codes.NotFound, "%v", "Not Found")

	// return Addr was revoked
	// Log Operation
	log.Printf("%s REVOKE SUCCESS %s %s\n", srcAddr, r.FSid, r.Addr)
	return &pb.RevokeAddrFSResponse{Data: r.FSid}, nil
Ejemplo n.º 4
func VSTests() {
	vsconfigs := make([]ValueClientConfig, *clients)
	var wg sync.WaitGroup
	for w := 0; w < *clients; w++ {
		vsconfigs[w].addr = *vsServer
		vsconfigs[w].id = w
		vsconfigs[w].count = perClient
		vsconfigs[w].value = &value
		vsconfigs[w].wg = &wg
		vsconfigs[w].wm = make([]*vp.WriteRequest, perClient)
		vsconfigs[w].rm = make([]*vp.ReadRequest, perClient)
		for k := 0; k < perClient; k++ {
			vsconfigs[w].wm[k] = &vp.WriteRequest{}
			vsconfigs[w].rm[k] = &vp.ReadRequest{}
			vsconfigs[w].wm[k].KeyA, vsconfigs[w].wm[k].KeyB = murmur3.Sum128([]byte(fmt.Sprintf("somethingtestkey%d-%d", vsconfigs[w].id, k)))
			vsconfigs[w].wm[k].TimestampMicro = brimtime.TimeToUnixMicro(time.Now())
			vsconfigs[w].rm[k].KeyA = vsconfigs[w].wm[k].KeyA
			vsconfigs[w].rm[k].KeyB = vsconfigs[w].wm[k].KeyB
	log.Println("ValueStore Key/hash generation complete. Spawning tests.")

	// ValueStore Tests
	if *vsWriteTest {
		t := time.Now()
		for w := 0; w < *clients; w++ {
			if *streamTest {
				go ValueStreamWrite(&vsconfigs[w])
			} else {
				go ValueWrite(&vsconfigs[w])
		log.Println("Issued", *clients*perClient, "VS WRITES")
		ts := time.Since(t).Seconds()
		log.Println("Total run time was:", ts, "seconds")
		log.Printf("Per second: %.2f\n", float64(*clients*perClient)/ts)
	if *vsReadTest {
		t := time.Now()
		for w := 0; w < *clients; w++ {
			if *streamTest {
				go ValueStreamRead(&vsconfigs[w])
			} else {
				go ValueRead(&vsconfigs[w])
		log.Println("Issued", *clients*perClient, "VS READS")
		ts := time.Since(t).Seconds()
		log.Println("Total run time was:", ts, "seconds")
		log.Printf("Per second: %.2f\n", float64(*clients*perClient)/ts)
Ejemplo n.º 5
// GrantAddrFS ...
func (s *FileSystemAPIServer) GrantAddrFS(ctx context.Context, r *fb.GrantAddrFSRequest) (*fb.GrantAddrFSResponse, error) {
	var status string
	var err error
	var acctData AcctPayLoad
	var fsData FileSysPayLoad
	var addrData AddrPayLoad
	var dataB []byte
	// Get incomming ip
	pr, ok := peer.FromContext(ctx)
	if ok {
	// getAcct data
	acctData, err = s.getAcct("/acct", r.Acctnum)
	if err != nil {
		log.Printf("Error %v on lookup for account %s", err, r.Acctnum)
		return nil, err
	// validate token
	if acctData.Token != r.Token {
		return nil, errf(codes.PermissionDenied, "%s", "Invalid Token")
	// getFS data
	fs := fmt.Sprintf("/acct/%s/fs", r.Acctnum)
	fsData, err = s.getFS(fs, r.FSid)
	if err != nil {
		log.Printf("Error %v on lookup for File system %s", err, r.Acctnum)
		return nil, err
	if fsData.Status == "active" {
		log.Println("FileSystem is active")
	// write out the ip address
	parentKey := fmt.Sprintf("/fs/%s/addr", r.FSid)
	childKey := r.Addr

	parentKeyA, parentKeyB := murmur3.Sum128([]byte(parentKey))
	childKeyA, childKeyB := murmur3.Sum128([]byte(childKey))
	timestampMicro := brimtime.TimeToUnixMicro(time.Now())
	addrData.Addr = r.Addr
	dataB, err = json.Marshal(addrData)
	if err != nil {
		log.Printf("Marshal Error: %v\n...", err)
		return nil, errf(codes.Internal, "%v", err)
	_, err = s.fsws.gstore.Write(context.Background(), parentKeyA, parentKeyB, childKeyA, childKeyB, timestampMicro, dataB)
	if err != nil {
		log.Printf("Write Error: %v", err)
		return nil, errf(codes.Internal, "%v", err)

	// DO stuff
	status = fmt.Sprintf("addr %s for filesystem %s with account id %s was granted", r.Addr, r.FSid, r.Acctnum)
	return &fb.GrantAddrFSResponse{Status: status}, nil
Ejemplo n.º 6
func (o *StoreComms) WriteValue(ctx context.Context, id, data []byte) error {
	keyA, keyB := murmur3.Sum128(id)
	timestampMicro := brimtime.TimeToUnixMicro(time.Now())
	oldTimestampMicro, err := o.vstore.Write(ctx, keyA, keyB, timestampMicro, data)
	if err != nil {
		return err
	if oldTimestampMicro >= timestampMicro {
		return ErrStoreHasNewerValue
	return nil
Ejemplo n.º 7
// writeGStore ...
func (fsws *FileSystemWS) writeGStore(g string, m string, p []byte) (string, error) {
	// prepare groupVal and memberVal
	log.Println("Starting a Write to the Group Store")
	keyA, keyB := murmur3.Sum128([]byte(g))
	childKeyA, childKeyB := murmur3.Sum128([]byte(m))
	timestampMicro := brimtime.TimeToUnixMicro(time.Now())
	newTimestampMicro, err := fsws.gstore.Write(context.Background(), keyA, keyB, childKeyA, childKeyB, timestampMicro, p)
	if err != nil {
		return "", err
	log.Println("Successfully wrote something to the Group Store")
	return fmt.Sprintf("TSM: %d", newTimestampMicro), nil
Ejemplo n.º 8
func (o *OortFS) Remove(ctx context.Context, parent []byte, name string) (int32, error) {
	v, err := o.validateIP(ctx)
	if err != nil {
		return 1, err
	if !v {
		return 1, errors.New("Unknown or unauthorized FS use")
	// Get the ID from the group list
	b, err := o.comms.ReadGroupItem(ctx, parent, []byte(name))
	if store.IsNotFound(err) {
		return 1, nil
	} else if err != nil {
		return 1, err
	d := &pb.DirEntry{}
	err = proto.Unmarshal(b, d)
	if err != nil {
		return 1, err
	// TODO: More error handling needed
	// TODO: Handle possible race conditions where user writes and deletes the same file over and over
	// Mark the item deleted in the group
	t := &pb.Tombstone{}
	tsm := brimtime.TimeToUnixMicro(time.Now())
	t.Dtime = tsm
	t.Qtime = tsm
	t.FsId = []byte("1") // TODO: Make sure this gets set when we are tracking fsids
	inode, err := o.GetInode(ctx, d.Id)
	if err != nil {
		return 1, err
	t.Blocks = inode.Blocks
	t.Inode = inode.Inode
	d.Tombstone = t
	b, err = proto.Marshal(d)
	if err != nil {
		return 1, err
	// NOTE: The tsm-1 is kind of a hack because the timestamp needs to be updated on this write, but if we choose tsm, once the actual delete comes through, it will not work because it is going to try to delete with a timestamp of tsm.
	err = o.comms.WriteGroupTS(ctx, parent, []byte(name), b, tsm-1)
	if err != nil {
		return 1, err // Not really sure what should be done here to try to recover from err
	o.deleteChan <- &DeleteItem{
		parent: parent,
		name:   name,
	return 0, nil
Ejemplo n.º 9
// GrantAddrFS ...
func (s *FileSystemAPIServer) GrantAddrFS(ctx context.Context, r *pb.GrantAddrFSRequest) (*pb.GrantAddrFSResponse, error) {
	var err error
	var addrData AddrRef
	var addrByte []byte
	srcAddr := ""

	// Get incomming ip
	pr, ok := peer.FromContext(ctx)
	if ok {
		srcAddr = pr.Addr.String()
	// validate token
	_, err = s.validateToken(r.Token)
	if err != nil {
		log.Printf("%s GRANT FAILED %s\n", srcAddr, "PermissionDenied")
		return nil, errf(codes.PermissionDenied, "%v", "Invalid Token")

	// GRANT an file system entry for the addr
	// 		write /fs/FSID/addr			addr						AddrRef
	pKey := fmt.Sprintf("/fs/%s/addr", r.FSid)
	pKeyA, pKeyB := murmur3.Sum128([]byte(pKey))
	cKeyA, cKeyB := murmur3.Sum128([]byte(r.Addr))
	timestampMicro := brimtime.TimeToUnixMicro(time.Now())
	addrData.Addr = r.Addr
	addrData.FSID = r.FSid
	addrByte, err = json.Marshal(addrData)
	if err != nil {
		log.Printf("%s GRANT FAILED %v\n", srcAddr, err)
		return nil, errf(codes.Internal, "%v", err)
	_, err = s.gstore.Write(context.Background(), pKeyA, pKeyB, cKeyA, cKeyB, timestampMicro, addrByte)
	if err != nil {
		log.Printf("%s GRANT FAILED %v\n", srcAddr, err)
		return nil, errf(codes.Internal, "%v", err)

	// return Addr was Granted
	// Log Operation
	log.Printf("%s GRANT SUCCESS %s %s\n", srcAddr, r.FSid, r.Addr)
	return &pb.GrantAddrFSResponse{Data: r.FSid}, nil
Ejemplo n.º 10
// RevokeAddrFS ...
func (s *FileSystemAPIServer) RevokeAddrFS(ctx context.Context, r *fb.RevokeAddrFSRequest) (*fb.RevokeAddrFSResponse, error) {
	var status string
	var err error
	var acctData AcctPayLoad
	// Get incomming ip
	pr, ok := peer.FromContext(ctx)
	if ok {
	// getAcct data
	acctData, err = s.getAcct("/acct", r.Acctnum)
	if err != nil {
		log.Printf("Error %v on lookup for account %s", err, r.Acctnum)
		return nil, errf(codes.NotFound, "%v", err)
	// validate token
	if acctData.Token != r.Token {
		return nil, errf(codes.PermissionDenied, "%s", "Invalid Token")
	parentKey := fmt.Sprintf("/fs/%s/addr", r.FSid)
	childKey := r.Addr

	parentKeyA, parentKeyB := murmur3.Sum128([]byte(parentKey))
	childKeyA, childKeyB := murmur3.Sum128([]byte(childKey))
	timestampMicro := brimtime.TimeToUnixMicro(time.Now())
	// Delete addr
	_, err = s.fsws.gstore.Delete(context.Background(), parentKeyA, parentKeyB, childKeyA, childKeyB, timestampMicro)
	if store.IsNotFound(err) {
		log.Printf("/fs/%s/addr/%s did not exist to delete", r.FSid, r.Addr)
		return nil, errf(codes.NotFound, "%s", "Addr not found")
	} else if err != nil {
		return nil, errf(codes.Internal, "%s", err)
	// DO stuff
	status = fmt.Sprintf("addr %s for filesystem %s with account id %s was revoked", r.Addr, r.FSid, r.Acctnum)
	return &fb.RevokeAddrFSResponse{Status: status}, nil
func (store *defaultValueStore) outPullReplicationPass(notifyChan chan *bgNotification) *bgNotification {
	if store.msgRing == nil {
		return nil
	ring := store.msgRing.Ring()
	if ring == nil || ring.ReplicaCount() < 2 || ring.NodeCount() < 2 {
		return nil
	begin := time.Now()
	defer func() {
		elapsed := time.Now().Sub(begin)
		store.logDebug("outPullReplication: pass took %s", elapsed)
		atomic.StoreInt64(&store.outPullReplicationNanoseconds, elapsed.Nanoseconds())
	rightwardPartitionShift := 64 - ring.PartitionBitCount()
	partitionCount := uint64(1) << ring.PartitionBitCount()
	if store.pullReplicationState.outIteration == math.MaxUint16 {
		store.pullReplicationState.outIteration = 0
	} else {
	ringVersion := ring.Version()
	ws := store.pullReplicationState.outWorkers
	for uint64(len(store.pullReplicationState.outKTBFs)) < ws {
		store.pullReplicationState.outKTBFs = append(store.pullReplicationState.outKTBFs, newValueKTBloomFilter(store.pullReplicationState.outBloomN, store.pullReplicationState.outBloomP, 0))
	var abort uint32
	f := func(p uint64, w uint64, ktbf *valueKTBloomFilter) {
		pb := p << rightwardPartitionShift
		rb := pb + ((uint64(1) << rightwardPartitionShift) / ws * w)
		var re uint64
		if w+1 == ws {
			if p+1 == partitionCount {
				re = math.MaxUint64
			} else {
				re = ((p + 1) << rightwardPartitionShift) - 1
		} else {
			re = pb + ((uint64(1) << rightwardPartitionShift) / ws * (w + 1)) - 1
		timestampbitsnow := uint64(brimtime.TimeToUnixMicro(time.Now())) << _TSB_UTIL_BITS
		cutoff := timestampbitsnow - store.replicationIgnoreRecent
		var more bool
		for atomic.LoadUint32(&abort) == 0 {
			rbThis := rb
			rb, more = store.locmap.ScanCallback(rb, re, 0, _TSB_LOCAL_REMOVAL, cutoff, store.pullReplicationState.outBloomN, func(keyA uint64, keyB uint64, timestampbits uint64, length uint32) bool {
				ktbf.add(keyA, keyB, timestampbits)
				return true
			ring2 := store.msgRing.Ring()
			if ring2 == nil || ring2.Version() != ringVersion {
			reThis := re
			if more {
				reThis = rb - 1
			prm := store.newOutPullReplicationMsg(ringVersion, uint32(p), cutoff, rbThis, reThis, ktbf)
			atomic.AddInt32(&store.outPullReplications, 1)
			store.msgRing.MsgToOtherReplicas(prm, uint32(p), store.pullReplicationState.outMsgTimeout)
			if !more {
	wg := &sync.WaitGroup{}
	for w := uint64(0); w < ws; w++ {
		go func(w uint64) {
			ktbf := store.pullReplicationState.outKTBFs[w]
			pb := partitionCount / ws * w
			for p := pb; p < partitionCount; p++ {
				if atomic.LoadUint32(&abort) != 0 {
				ring2 := store.msgRing.Ring()
				if ring2 == nil || ring2.Version() != ringVersion {
				if ring.Responsible(uint32(p)) {
					f(p, w, ktbf)
			for p := uint64(0); p < pb; p++ {
				if atomic.LoadUint32(&abort) != 0 {
				ring2 := store.msgRing.Ring()
				if ring2 == nil || ring2.Version() != ringVersion {
				if ring.Responsible(uint32(p)) {
					f(p, w, ktbf)
	waitChan := make(chan struct{}, 1)
	go func() {
	select {
	case notification := <-notifyChan:
		atomic.AddUint32(&abort, 1)
		return notification
	case <-waitChan:
		return nil
// inPullReplication actually processes incoming pull-replication messages;
// there may be more than one of these workers.
func (store *defaultValueStore) inPullReplication(wg *sync.WaitGroup) {
	k := make([]uint64, store.bulkSetState.msgCap/_VALUE_BULK_SET_MSG_MIN_ENTRY_LENGTH*2)
	v := make([]byte, store.valueCap)
	for {
		prm := <-store.pullReplicationState.inMsgChan
		if prm == nil {
		if store.msgRing == nil {
			store.pullReplicationState.inFreeMsgChan <- prm
		ring := store.msgRing.Ring()
		if ring == nil {
			store.pullReplicationState.inFreeMsgChan <- prm
		k = k[:0]
		// This is what the remote system used when making its bloom filter,
		// computed via its config.ReplicationIgnoreRecent setting. We want to
		// use the exact same cutoff in our checks and possible response.
		cutoff := prm.cutoff()
		tombstoneCutoff := (uint64(brimtime.TimeToUnixMicro(time.Now())) << _TSB_UTIL_BITS) - store.tombstoneDiscardState.age
		ktbf := prm.ktBloomFilter()
		l := int64(store.bulkSetState.msgCap)
		callback := func(keyA uint64, keyB uint64, timestampbits uint64, length uint32) bool {
			if timestampbits&_TSB_DELETION == 0 || timestampbits >= tombstoneCutoff {
				if !ktbf.mayHave(keyA, keyB, timestampbits) {
					k = append(k, keyA, keyB)
					l -= _VALUE_BULK_SET_MSG_ENTRY_HEADER_LENGTH + int64(length)
					if l <= 0 {
						return false
			return true
		// Based on the replica index for the local node, start the scan at
		// different points. For example, in a three replica system the first
		// replica would start scanning at the start, the second a third
		// through, the last would start two thirds through. This is so that
		// pull-replication messages, which are sent concurrently to all other
		// replicas, will get different responses back instead of duplicate
		// items if there is a lot of data to be sent.
		responsibleReplica := ring.ResponsibleReplica(uint32(prm.rangeStart() >> (64 - ring.PartitionBitCount())))
		if responsibleReplica < 0 {
			responsibleReplica = 0
		scanStart := prm.rangeStart() + (prm.rangeStop()-prm.rangeStart())/uint64(ring.ReplicaCount())*uint64(responsibleReplica)
		scanStop := prm.rangeStop()
		store.locmap.ScanCallback(scanStart, scanStop, 0, _TSB_LOCAL_REMOVAL, cutoff, math.MaxUint64, callback)
		if scanStart != prm.rangeStart() && l > 0 {
			scanStop = scanStart - 1
			scanStart = prm.rangeStart()
			store.locmap.ScanCallback(scanStart, scanStop, 0, _TSB_LOCAL_REMOVAL, cutoff, math.MaxUint64, callback)
		nodeID := prm.nodeID()
		store.pullReplicationState.inFreeMsgChan <- prm
		if len(k) > 0 {
			bsm := store.newOutBulkSetMsg()
			// Indicate that a response to this bulk-set message is not
			// necessary. If the message fails to reach its destination, that
			// destination will simply resend another pull replication message
			// on its next pass.
			binary.BigEndian.PutUint64(bsm.header, 0)
			var t uint64
			var err error
			for i := 0; i < len(k); i += 2 {
				t, v, err = store.read(k[i], k[i+1], v[:0])
				if IsNotFound(err) {
					if t == 0 {
				} else if err != nil {
				if t&_TSB_LOCAL_REMOVAL == 0 {
					if !bsm.add(k[i], k[i+1], t, v) {
					atomic.AddInt32(&store.outBulkSetValues, 1)
			if len(bsm.body) > 0 {
				atomic.AddInt32(&store.outBulkSets, 1)
				store.msgRing.MsgToNode(bsm, nodeID, store.pullReplicationState.inResponseMsgTimeout)
Ejemplo n.º 13
func (o *StoreComms) DeleteValue(ctx context.Context, id []byte) error {
	timestampMicro := brimtime.TimeToUnixMicro(time.Now())
	return o.DeleteValueTS(ctx, id, timestampMicro)
Ejemplo n.º 14
func GSTests() {
	gsconfigs := make([]GroupClientConfig, *clients)
	var wg sync.WaitGroup
	for w := 0; w < *clients; w++ {
		gsconfigs[w].addr = *gsServer
		gsconfigs[w].id = w
		gsconfigs[w].count = perClient
		gsconfigs[w].value = &value

		gsconfigs[w].wg = &wg
		perGroup := perClient / *groups
		for g := 0; g < *groups; g++ {
			grpA, grpB := murmur3.Sum128([]byte(fmt.Sprintf("group%d-%d", gsconfigs[w].id, g)))
			for k := 0; k < perGroup; k++ {
				tsm := brimtime.TimeToUnixMicro(time.Now())
				wr := &gp.WriteRequest{
					KeyA:           grpA,
					KeyB:           grpB,
					TimestampMicro: tsm,
				wr.ChildKeyA, wr.ChildKeyB = murmur3.Sum128([]byte(fmt.Sprintf("somethingtestkey%d-%d", gsconfigs[w].id, k)))
				rr := &gp.ReadRequest{
					KeyA:      grpA,
					KeyB:      grpB,
					ChildKeyA: wr.ChildKeyA,
					ChildKeyB: wr.ChildKeyB,
				gsconfigs[w].wm = append(gsconfigs[w].wm, wr)
				gsconfigs[w].rm = append(gsconfigs[w].rm, rr)
	log.Println("GroupStore Key/hash generation complete. Spawning tests.")

	if *gsWriteTest {
		t := time.Now()
		for w := 0; w < *clients; w++ {
			if *streamTest {
				go GroupStreamWrite(&gsconfigs[w])
			} else {
				go GroupWrite(&gsconfigs[w])
		log.Println("Issued", *clients*perClient, "GS WRITES")
		ts := time.Since(t).Seconds()
		log.Println("Total run time was:", ts, "seconds")
		log.Printf("Per second: %.2f\n", float64(*clients*perClient)/ts)
	if *gsReadTest {
		t := time.Now()
		for w := 0; w < *clients; w++ {
			if *streamTest {
				go GroupStreamRead(&gsconfigs[w])
			} else {
				go GroupRead(&gsconfigs[w])
		log.Println("Issued", *clients*perClient, "GS READS")
		ts := time.Since(t).Seconds()
		log.Println("Total run time was:", ts, "seconds")
		log.Printf("Per second: %.2f\n", float64(*clients*perClient)/ts)
Ejemplo n.º 15
func (o *StoreComms) DeleteGroupItem(ctx context.Context, key, childKey []byte) error {
	timestampMicro := brimtime.TimeToUnixMicro(time.Now())
	return o.DeleteGroupItemTS(ctx, key, childKey, timestampMicro)
Ejemplo n.º 16
func (o *StoreComms) WriteGroup(ctx context.Context, key, childKey, value []byte) error {
	timestampMicro := brimtime.TimeToUnixMicro(time.Now())
	return o.WriteGroupTS(ctx, key, childKey, value, timestampMicro)
// tombstoneDiscardPassExpiredDeletions scans for entries marked with
// _TSB_DELETION (but not _TSB_LOCAL_REMOVAL) that are older than the maximum
// tombstone age and marks them for _TSB_LOCAL_REMOVAL.
func (store *defaultGroupStore) tombstoneDiscardPassExpiredDeletions(notifyChan chan *bgNotification) *bgNotification {
	// Each worker will perform a pass on a subsection of each partition's key
	// space. Additionally, each worker will start their work on different
	// partition. This reduces contention for a given section of the locmap.
	partitionShift := uint16(0)
	partitionMax := uint64(0)
	if store.msgRing != nil {
		pbc := store.msgRing.Ring().PartitionBitCount()
		partitionShift = 64 - pbc
		partitionMax = (uint64(1) << pbc) - 1
	workerMax := uint64(store.workers - 1)
	workerPartitionPiece := (uint64(1) << partitionShift) / (workerMax + 1)
	work := func(partition uint64, worker uint64, localRemovals []groupLocalRemovalEntry) {
		partitionOnLeftBits := partition << partitionShift
		rangeBegin := partitionOnLeftBits + (workerPartitionPiece * worker)
		var rangeEnd uint64
		// A little bit of complexity here to handle where the more general
		// expressions would have overflow issues.
		if worker != workerMax {
			rangeEnd = partitionOnLeftBits + (workerPartitionPiece * (worker + 1)) - 1
		} else {
			if partition != partitionMax {
				rangeEnd = ((partition + 1) << partitionShift) - 1
			} else {
				rangeEnd = math.MaxUint64
		cutoff := (uint64(brimtime.TimeToUnixMicro(time.Now())) << _TSB_UTIL_BITS) - store.tombstoneDiscardState.age
		more := true
		for more {
			localRemovalsIndex := 0
			// Since we shouldn't try to modify what we're scanning while we're
			// scanning (lock contention) we instead record in localRemovals
			// what to modify after the scan.
			rangeBegin, more = store.locmap.ScanCallback(rangeBegin, rangeEnd, _TSB_DELETION, _TSB_LOCAL_REMOVAL, cutoff, uint64(store.tombstoneDiscardState.batchSize), func(keyA uint64, keyB uint64, childKeyA uint64, childKeyB uint64, timestampbits uint64, length uint32) bool {
				e := &localRemovals[localRemovalsIndex]
				e.keyA = keyA
				e.keyB = keyB

				e.childKeyA = childKeyA
				e.childKeyB = childKeyB

				e.timestampbits = timestampbits
				return true
			atomic.AddInt32(&store.expiredDeletions, int32(localRemovalsIndex))
			for i := 0; i < localRemovalsIndex; i++ {
				e := &localRemovals[i]
				// These writes go through the entire system, so they're
				// persisted and therefore restored on restarts.
				store.write(e.keyA, e.keyB, e.childKeyA, e.childKeyB, e.timestampbits|_TSB_LOCAL_REMOVAL, nil, true)
	// To avoid memory churn, the localRemovals scratchpads are allocated just
	// once and passed in to the workers.
	for len(store.tombstoneDiscardState.localRemovals) <= int(workerMax) {
		store.tombstoneDiscardState.localRemovals = append(store.tombstoneDiscardState.localRemovals, make([]groupLocalRemovalEntry, store.tombstoneDiscardState.batchSize))
	var abort uint32
	wg := &sync.WaitGroup{}
	wg.Add(int(workerMax + 1))
	for worker := uint64(0); worker <= workerMax; worker++ {
		go func(worker uint64) {
			localRemovals := store.tombstoneDiscardState.localRemovals[worker]
			partitionBegin := (partitionMax + 1) / (workerMax + 1) * worker
			for partition := partitionBegin; ; {
				if atomic.LoadUint32(&abort) != 0 {
				work(partition, worker, localRemovals)
				if partition > partitionMax {
					partition = 0
				if partition == partitionBegin {
	waitChan := make(chan struct{}, 1)
	go func() {
	select {
	case notification := <-notifyChan:
		atomic.AddUint32(&abort, 1)
		return notification
	case <-waitChan:
		return nil
Ejemplo n.º 18
func (c *Client) parseGroupCmd(line string) (string, error) {
	if c.gstore == nil {
		err := c.getGroupClient()
		if err != nil {
			return "", err
	split := strings.SplitN(line, " ", 2)
	cmd := split[0]
	if len(split) != 2 {
		if cmd == "exit" {
			return "", fmt.Errorf("Exiting..")
		if cmd == "help" {
			return c.printHelp(), nil
		return c.printHelp(), nil
	args := split[1]
	switch cmd {
	case "write":
		sarg := strings.SplitN(args, " ", 3)
		if len(sarg) < 3 {
			return fmt.Sprintf("write needs groupkey, key, value: `write groupkey somekey some value thing here`"), nil
		keyA, keyB := murmur3.Sum128([]byte(sarg[0]))
		childKeyA, childKeyB := murmur3.Sum128([]byte(sarg[1]))
		timestampMicro := brimtime.TimeToUnixMicro(time.Now())
		oldTimestampMicro, err := c.gstore.Write(context.Background(), keyA, keyB, childKeyA, childKeyB, timestampMicro, []byte(sarg[2]))
		if err != nil {
			return "", err
		return fmt.Sprintf("WRITE TIMESTAMPMICRO: %d\nPREVIOUS TIMESTAMPMICRO: %d", timestampMicro, oldTimestampMicro), nil
	case "write-hash":
		sarg := strings.SplitN(args, " ", 4)
		if len(sarg) < 4 {
			return fmt.Sprintf("write-hash needs groupkey, keyahash keybhash, value: `write-hash groupkey 19191919 19191919 some value thing here`"), nil
		keyA, keyB := murmur3.Sum128([]byte(sarg[0]))
		childKeyA, err := strconv.ParseUint(sarg[1], 10, 64)
		if err != nil {
			return "", err
		childKeyB, err := strconv.ParseUint(sarg[2], 10, 64)
		if err != nil {
			return "", err
		timestampMicro := brimtime.TimeToUnixMicro(time.Now())
		oldTimestampMicro, err := c.gstore.Write(context.Background(), keyA, keyB, childKeyA, childKeyB, timestampMicro, []byte(sarg[3]))
		if err != nil {
			return "", err
		return fmt.Sprintf("WRITE TIMESTAMPMICRO: %d\n PREVIOUS TIMESTAMPMICRO: %d", timestampMicro, oldTimestampMicro), nil
	case "read":
		sarg := strings.SplitN(args, " ", 2)
		if len(sarg) < 2 {
			return fmt.Sprintf("read needs groupkey, subkey"), nil
		keyA, keyB := murmur3.Sum128([]byte(sarg[0]))
		childKeyA, childKeyB := murmur3.Sum128([]byte(sarg[1]))
		timestampMicro, value, err := c.gstore.Read(context.Background(), keyA, keyB, childKeyA, childKeyB, nil)
		if store.IsNotFound(err) {
			return fmt.Sprintf("not found"), nil
		} else if err != nil {
			return "", err
		return fmt.Sprintf("TIMESTAMPMICRO: %d\nVALUE: %s", timestampMicro, value), nil
	case "read-hash":
		sarg := strings.SplitN(args, " ", 3)
		if len(sarg) < 3 {
			return fmt.Sprintf("read needs groupkey, subkeyA, subkeyB"), nil
		keyA, keyB := murmur3.Sum128([]byte(sarg[0]))
		childKeyA, err := strconv.ParseUint(sarg[1], 10, 64)
		if err != nil {
			return "", err
		childKeyB, err := strconv.ParseUint(sarg[2], 10, 64)
		if err != nil {
			return "", err
		timestampMicro, value, err := c.gstore.Read(context.Background(), keyA, keyB, childKeyA, childKeyB, nil)
		if store.IsNotFound(err) {
			return fmt.Sprintf("not found"), nil
		} else if err != nil {
			return "", err
		return fmt.Sprintf("TIMESTAMPMICRO: %d\nVALUE: %s", timestampMicro, value), nil
	case "read-group":
		KeyA, KeyB := murmur3.Sum128([]byte(args))
		items, err := c.gstore.ReadGroup(context.Background(), KeyA, KeyB)
		if store.IsNotFound(err) {
			return fmt.Sprintf("not found"), nil
		} else if err != nil {
			return "", err
		keys := make([]string, len(items))
		for k, v := range items {
			keys[k] = fmt.Sprintf("TIMESTAMPMICRO: %d [ %d | %d] VALUE: %s", v.TimestampMicro, v.ChildKeyA, v.ChildKeyB, v.Value)
		return fmt.Sprintf(strings.Join(keys, "\n")), nil
	case "delete":
		sarg := strings.SplitN(args, " ", 2)
		if len(sarg) < 2 {
			return fmt.Sprintf("delete needs groupkey, subkey"), nil
		keyA, keyB := murmur3.Sum128([]byte(sarg[0]))
		childKeyA, childKeyB := murmur3.Sum128([]byte(sarg[1]))
		timestampMicro := brimtime.TimeToUnixMicro(time.Now())
		oldTimestampMicro, err := c.gstore.Delete(context.Background(), keyA, keyB, childKeyA, childKeyB, timestampMicro)
		if err != nil {
			return "", err
		return fmt.Sprintf("TIMESTAMPMICRO: %d\nOLD TIMESTAMPMICRO: %d", timestampMicro, oldTimestampMicro), nil
	case "lookup":
		sarg := strings.SplitN(args, " ", 2)
		if len(sarg) < 2 {
			return fmt.Sprintf("lookup needs groupkey, subkey"), nil
		keyA, keyB := murmur3.Sum128([]byte(sarg[0]))
		childKeyA, childKeyB := murmur3.Sum128([]byte(sarg[1]))
		timestampMicro, length, err := c.gstore.Lookup(context.Background(), keyA, keyB, childKeyA, childKeyB)
		if store.IsNotFound(err) {
			return fmt.Sprintf("not found"), nil
		} else if err != nil {
			return "", err
		return fmt.Sprintf("TIMESTAMPMICRO: %d\nLENGTH: %d", timestampMicro, length), nil
	case "lookup-group":
		keyA, keyB := murmur3.Sum128([]byte(args))
		items, err := c.gstore.LookupGroup(context.Background(), keyA, keyB)
		if store.IsNotFound(err) {
			return fmt.Sprintf("not found"), nil
		} else if err != nil {
			return "", err
		keys := make([]string, len(items))
		for k, v := range items {
			keys[k] = fmt.Sprintf("TIMESTAMPMICRO: %d [ %d | %d ]", v.TimestampMicro, v.ChildKeyA, v.ChildKeyB)
		return fmt.Sprintf(strings.Join(keys, "\n")), nil
	case "mode":
		if args == "value" {
			c.gmode = false
			return fmt.Sprintf("Switched to value mode"), nil
		if args == "group" {
			return fmt.Sprintf("Already in group store mode"), nil
		return fmt.Sprintf("Valid modes are: value | group"), nil
	case "exit":
		return "", fmt.Errorf("Exiting..")
	return c.printHelp(), nil
Ejemplo n.º 19
// CreateFS ...
func (s *FileSystemAPIServer) CreateFS(ctx context.Context, r *fb.CreateFSRequest) (*fb.CreateFSResponse, error) {
	var status string
	var result string
	var acctData AcctPayLoad
	var newFS FileSysPayLoad
	var acctRef AcctRefPayload
	var acctRefB []byte
	var err error
	var dataB []byte
	// Get incomming ip
	pr, ok := peer.FromContext(ctx)
	if ok {
	// getAcct data
	acctData, err = s.getAcct("/acct", r.Acctnum)
	if err != nil {
		log.Printf("Error %v on lookup for account %s", err, r.Acctnum)
		return nil, err
	// validate token
	if acctData.Token != r.Token {
		return nil, errf(codes.PermissionDenied, "%s", "Invalid Token")
	// Check for to see if file system name exists
	fs := fmt.Sprintf("/acct/%s/fs", acctData.ID)
	err = s.dupNameCheck(fs, r.FSName)
	if err != nil {
		log.Printf("Precondition Failed: %v\n...", err)
		return nil, errf(codes.FailedPrecondition, "%v", err)
	// File system values
	parentKey := fmt.Sprintf("/acct/%s/fs", r.Acctnum)
	childKey := uuid.NewV4().String()
	newFS.ID = childKey
	newFS.AcctID = r.Acctnum
	newFS.Name = r.FSName
	newFS.SizeInBytes = 107374182400
	newFS.Status = "active"
	newFS.CreateDate = time.Now().Unix()
	newFS.DeleteDate = 0
	//write file system
	dataB, err = json.Marshal(newFS)
	if err != nil {
		log.Printf("Marshal Error: %v\n...", err)
		return nil, errf(codes.Internal, "%v", err)
	_, err = s.fsws.writeGStore(parentKey, childKey, dataB)
	if err != nil {
		log.Printf("Write Error: %v", err)
		return nil, errf(codes.Internal, "%v", err)
	// Write special filesystem look up entry
	// "/fs"	"[file system uuid]"		{"id": "[filesystem uuid]", "acctid": "[account uuid]"}
	parentKeyA, parentKeyB := murmur3.Sum128([]byte("/fs"))
	childKeyA, childKeyB := murmur3.Sum128([]byte(newFS.ID))
	timestampMicro := brimtime.TimeToUnixMicro(time.Now())
	acctRef.ID = newFS.ID
	acctRef.AcctID = r.Acctnum
	acctRefB, err = json.Marshal(acctRef)
	if err != nil {
		log.Printf("Marshal Error: %v\n...", err)
		return nil, errf(codes.Internal, "%v", err)
	_, err = s.fsws.gstore.Write(context.Background(), parentKeyA, parentKeyB, childKeyA, childKeyB, timestampMicro, acctRefB)
	if err != nil {
		log.Printf("Write Error: %v", err)
		return nil, errf(codes.Internal, "%v", err)
	// Prep reults to return
	status = "OK"
	result = fmt.Sprintf("File System %s was created from Account %s", childKey, r.Acctnum)
	return &fb.CreateFSResponse{Payload: result, Status: status}, nil
Ejemplo n.º 20
func (c *Client) parseValueCmd(line string) (string, error) {
	if c.vconn == nil {
		err := c.getValueClient()
		if err != nil {
			return "", err
	split := strings.SplitN(line, " ", 2)
	cmd := split[0]
	if len(split) != 2 {
		if cmd == "exit" {
			return "", fmt.Errorf("Exiting..")
		if cmd == "help" {
			return c.printHelp(), nil
		return c.printHelp(), nil
	args := split[1]
	switch cmd {
	case "write":
		sarg := strings.SplitN(args, " ", 2)
		if len(sarg) < 2 {
			return fmt.Sprintf("write needs key and value: `write somekey some value thing here`"), nil
		keyA, keyB := murmur3.Sum128([]byte(sarg[0]))
		value := []byte(sarg[1])
		timestampMicro := brimtime.TimeToUnixMicro(time.Now())
		oldTimestampMicro, err := c.vstore.Write(context.Background(), keyA, keyB, timestampMicro, value)
		if err != nil {
			return "", err
		return fmt.Sprintf("WRITE TIMESTAMPMICRO: %d\nPREVIOUS TIMESTAMPMICRO: %d", timestampMicro, oldTimestampMicro), nil
	case "read":
		keyA, keyB := murmur3.Sum128([]byte(args))
		timestampMicro, value, err := c.vstore.Read(context.Background(), keyA, keyB, nil)
		if store.IsNotFound(err) {
			return fmt.Sprintf("not found"), nil
		} else if err != nil {
			return "", err
		return fmt.Sprintf("TIMESTAMPMICRO: %d\nVALUE: %s", timestampMicro, value), nil
	case "delete":
		keyA, keyB := murmur3.Sum128([]byte(args))
		timestampMicro := brimtime.TimeToUnixMicro(time.Now())
		oldTimestampMicro, err := c.vstore.Delete(context.Background(), keyA, keyB, timestampMicro)
		if err != nil {
			return "", err
		return fmt.Sprintf("TIMESTAMPMICRO: %d\nOLD TIMESTAMPMICRO: %d", timestampMicro, oldTimestampMicro), nil
	case "lookup":
		keyA, keyB := murmur3.Sum128([]byte(args))
		timestampMicro, length, err := c.vstore.Lookup(context.Background(), keyA, keyB)
		if store.IsNotFound(err) {
			return fmt.Sprintf("not found"), nil
		} else if err != nil {
			return "", err
		return fmt.Sprintf("TIMESTAMPMICRO: %d\nLENGTH: %d", timestampMicro, length), nil
	case "mode":
		if args == "value" {
			return fmt.Sprintf("Already in value store mode"), nil
		if args == "group" {
			c.gmode = true
			return fmt.Sprintf("Switched to group mode"), nil
		return fmt.Sprintf("Valid modes are: value | group"), nil
	case "exit":
		return "", fmt.Errorf("Exiting..")
	return c.printHelp(), nil
Ejemplo n.º 21
// CreateFS ...
func (s *FileSystemAPIServer) CreateFS(ctx context.Context, r *pb.CreateFSRequest) (*pb.CreateFSResponse, error) {
	var err error
	var acctID string
	srcAddr := ""
	var fsRef FileSysRef
	var fsRefByte []byte
	var fsSysAttr FileSysAttr
	var fsSysAttrByte []byte

	// Get incomming ip
	pr, ok := peer.FromContext(ctx)
	if ok {
		srcAddr = pr.Addr.String()

	// Validate Token
	acctID, err = s.validateToken(r.Token)
	if err != nil {
		log.Printf("%s CREATE FAILED %s\n", srcAddr, "PermissionDenied")
		return nil, errf(codes.PermissionDenied, "%v", "Invalid Token")

	fsID := uuid.NewV4().String()
	timestampMicro := brimtime.TimeToUnixMicro(time.Now())
	// Write file system reference entries.
	// write /fs 								FSID						FileSysRef
	pKey := "/fs"
	pKeyA, pKeyB := murmur3.Sum128([]byte(pKey))
	cKeyA, cKeyB := murmur3.Sum128([]byte(fsID))
	fsRef.AcctID = acctID
	fsRef.FSID = fsID
	fsRefByte, err = json.Marshal(fsRef)
	if err != nil {
		log.Printf("%s  CREATE FAILED %v\n", srcAddr, err)
		return nil, errf(codes.Internal, "%v", err)
	_, err = s.gstore.Write(context.Background(), pKeyA, pKeyB, cKeyA, cKeyB, timestampMicro, fsRefByte)
	if err != nil {
		log.Printf("%s CREATE FAILED %v\n", srcAddr, err)
		return nil, errf(codes.Internal, "%v", err)
	// write /acct/acctID				FSID						FileSysRef
	pKey = fmt.Sprintf("/acct/%s", acctID)
	pKeyA, pKeyB = murmur3.Sum128([]byte(pKey))
	_, err = s.gstore.Write(context.Background(), pKeyA, pKeyB, cKeyA, cKeyB, timestampMicro, fsRefByte)
	if err != nil {
		log.Printf("%s CREATE FAILED %v\n", srcAddr, err)
		return nil, errf(codes.Internal, "%v", err)
	// Write file system attributes
	// write /fs/FSID						name						FileSysAttr
	pKey = fmt.Sprintf("/fs/%s", fsID)
	pKeyA, pKeyB = murmur3.Sum128([]byte(pKey))
	cKeyA, cKeyB = murmur3.Sum128([]byte("name"))
	fsSysAttr.Attr = "name"
	fsSysAttr.Value = r.FSName
	fsSysAttr.FSID = fsID
	fsSysAttrByte, err = json.Marshal(fsSysAttr)
	if err != nil {
		log.Printf("%s  CREATE FAILED %v\n", srcAddr, err)
		return nil, errf(codes.Internal, "%v", err)
	_, err = s.gstore.Write(context.Background(), pKeyA, pKeyB, cKeyA, cKeyB, timestampMicro, fsSysAttrByte)
	if err != nil {
		log.Printf("%s CREATE FAILED %v\n", srcAddr, err)
		return nil, errf(codes.Internal, "%v", err)

	// Return File System UUID
	// Log Operation
	log.Printf("%s CREATE SUCCESS %s\n", srcAddr, fsID)
	return &pb.CreateFSResponse{Data: fsID}, nil
func (store *defaultGroupStore) pushReplicationPass(notifyChan chan *bgNotification) *bgNotification {
	if store.msgRing == nil {
		return nil
	begin := time.Now()
	defer func() {
		store.logDebug("pushReplication: pass took %s", time.Now().Sub(begin))
	ring := store.msgRing.Ring()
	if ring == nil {
		return nil
	ringVersion := ring.Version()
	pbc := ring.PartitionBitCount()
	partitionShift := uint64(64 - pbc)
	partitionMax := (uint64(1) << pbc) - 1
	workerMax := uint64(store.pushReplicationState.workers - 1)
	workerPartitionPiece := (uint64(1) << partitionShift) / (workerMax + 1)
	// To avoid memory churn, the scratchpad areas are allocated just once and
	// passed in to the workers.
	for len(store.pushReplicationState.lists) < int(workerMax+1) {
		store.pushReplicationState.lists = append(store.pushReplicationState.lists, make([]uint64, store.bulkSetState.msgCap/_GROUP_BULK_SET_MSG_MIN_ENTRY_LENGTH*4))
	for len(store.pushReplicationState.valBufs) < int(workerMax+1) {
		store.pushReplicationState.valBufs = append(store.pushReplicationState.valBufs, make([]byte, store.valueCap))
	var abort uint32
	work := func(partition uint64, worker uint64, list []uint64, valbuf []byte) {
		partitionOnLeftBits := partition << partitionShift
		rangeBegin := partitionOnLeftBits + (workerPartitionPiece * worker)
		var rangeEnd uint64
		// A little bit of complexity here to handle where the more general
		// expressions would have overflow issues.
		if worker != workerMax {
			rangeEnd = partitionOnLeftBits + (workerPartitionPiece * (worker + 1)) - 1
		} else {
			if partition != partitionMax {
				rangeEnd = ((partition + 1) << partitionShift) - 1
			} else {
				rangeEnd = math.MaxUint64
		timestampbitsNow := uint64(brimtime.TimeToUnixMicro(time.Now())) << _TSB_UTIL_BITS
		cutoff := timestampbitsNow - store.replicationIgnoreRecent
		tombstoneCutoff := timestampbitsNow - store.tombstoneDiscardState.age
		availableBytes := int64(store.bulkSetState.msgCap)
		list = list[:0]
		// We ignore the "more" option from ScanCallback and just send the
		// first matching batch each full iteration. Once a remote end acks the
		// batch, those keys will have been removed and the first matching
		// batch will start with any remaining keys.
		// First we gather the matching keys to send.
		store.locmap.ScanCallback(rangeBegin, rangeEnd, 0, _TSB_LOCAL_REMOVAL, cutoff, math.MaxUint64, func(keyA uint64, keyB uint64, childKeyA uint64, childKeyB uint64, timestampbits uint64, length uint32) bool {
			inMsgLength := _GROUP_BULK_SET_MSG_ENTRY_HEADER_LENGTH + int64(length)
			if timestampbits&_TSB_DELETION == 0 || timestampbits >= tombstoneCutoff {
				list = append(list, keyA, keyB, childKeyA, childKeyB)
				availableBytes -= inMsgLength
				if availableBytes < inMsgLength {
					return false
			return true
		if len(list) <= 0 || atomic.LoadUint32(&abort) != 0 {
		ring2 := store.msgRing.Ring()
		if ring2 == nil || ring2.Version() != ringVersion {
		// Then we build and send the actual message.
		bsm := store.newOutBulkSetMsg()
		var timestampbits uint64
		var err error
		for i := 0; i < len(list); i += 4 {
			timestampbits, valbuf, err = store.read(list[i], list[i+1], list[i+2], list[i+3], valbuf[:0])
			// This might mean we need to send a deletion or it might mean the
			// key has been completely removed from our records
			// (timestampbits==0).
			if IsNotFound(err) {
				if timestampbits == 0 {
			} else if err != nil {
			if timestampbits&_TSB_LOCAL_REMOVAL == 0 && timestampbits < cutoff && (timestampbits&_TSB_DELETION == 0 || timestampbits >= tombstoneCutoff) {
				if !bsm.add(list[i], list[i+1], list[i+2], list[i+3], timestampbits, valbuf) {
				atomic.AddInt32(&store.outBulkSetPushValues, 1)
		atomic.AddInt32(&store.outBulkSetPushes, 1)
		store.msgRing.MsgToOtherReplicas(bsm, uint32(partition), store.pushReplicationState.msgTimeout)
	wg := &sync.WaitGroup{}
	wg.Add(int(workerMax + 1))
	for worker := uint64(0); worker <= workerMax; worker++ {
		go func(worker uint64) {
			list := store.pushReplicationState.lists[worker]
			valbuf := store.pushReplicationState.valBufs[worker]
			partitionBegin := (partitionMax + 1) / (workerMax + 1) * worker
			for partition := partitionBegin; ; {
				if atomic.LoadUint32(&abort) != 0 {
				ring2 := store.msgRing.Ring()
				if ring2 == nil || ring2.Version() != ringVersion {
				if !ring.Responsible(uint32(partition)) {
					work(partition, worker, list, valbuf)
				if partition > partitionMax {
					partition = 0
				if partition == partitionBegin {
	waitChan := make(chan struct{}, 1)
	go func() {
	select {
	case notification := <-notifyChan:
		atomic.AddUint32(&abort, 1)
		return notification
	case <-waitChan:
		return nil