func GroupWrite(c *GroupClientConfig) { defer c.wg.Done() var err error var opts []grpc.DialOption var creds credentials.TransportAuthenticator creds = credentials.NewTLS(&tls.Config{ InsecureSkipVerify: true, }) opts = append(opts, grpc.WithTransportCredentials(creds)) conn, err := grpc.Dial(c.addr, opts...) if err != nil { log.Fatalln(fmt.Sprintf("Failed to dial server: %s", err)) } defer conn.Close() client := gp.NewGroupStoreClient(conn) w := &gp.WriteRequest{ Value: *c.value, } empty := []byte("") for i, _ := range c.wm { ctx, _ := context.WithTimeout(context.Background(), 10*time.Second) c.wm[i].Value = *c.value w.TimestampMicro = brimtime.TimeToUnixMicro(time.Now()) res, err := client.Write(ctx, c.wm[i]) if err != nil { log.Println("Client", c.id, ":", err) } if res.TimestampMicro > w.TimestampMicro { log.Printf("TSM is newer than attempted, Key %d-%d Got %s, Sent: %s", c.id, i, brimtime.UnixMicroToTime(res.TimestampMicro), brimtime.UnixMicroToTime(w.TimestampMicro)) } c.wm[i].Value = empty } }
// RevokeAddrFS ... func (s *FileSystemAPIServer) RevokeAddrFS(ctx context.Context, r *pb.RevokeAddrFSRequest) (*pb.RevokeAddrFSResponse, error) { var err error srcAddr := "" // Get incomming ip pr, ok := peer.FromContext(ctx) if ok { srcAddr = pr.Addr.String() } // Validate Token _, err = s.validateToken(r.Token) if err != nil { log.Printf("%s REVOKE FAILED %s\n", srcAddr, "PermissionDenied") return nil, errf(codes.PermissionDenied, "%v", "Invalid Token") } // REVOKE an file system entry for the addr // delete /fs/FSID/addr addr AddrRef pKey := fmt.Sprintf("/fs/%s/addr", r.FSid) pKeyA, pKeyB := murmur3.Sum128([]byte(pKey)) cKeyA, cKeyB := murmur3.Sum128([]byte(r.Addr)) timestampMicro := brimtime.TimeToUnixMicro(time.Now()) _, err = s.gstore.Delete(context.Background(), pKeyA, pKeyB, cKeyA, cKeyB, timestampMicro) if store.IsNotFound(err) { log.Printf("%s REVOKE FAILED %s %s\n", srcAddr, r.FSid, r.Addr) return nil, errf(codes.NotFound, "%v", "Not Found") } // return Addr was revoked // Log Operation log.Printf("%s REVOKE SUCCESS %s %s\n", srcAddr, r.FSid, r.Addr) return &pb.RevokeAddrFSResponse{Data: r.FSid}, nil }
// RevokeAddrFS ... func (s *FileSystemAPIServer) RevokeAddrFS(ctx context.Context, r *pb.RevokeAddrFSRequest) (*pb.RevokeAddrFSResponse, error) { var err error var acctID string var value []byte var fsRef FileSysRef srcAddr := "" // Get incomming ip pr, ok := peer.FromContext(ctx) if ok { srcAddr = pr.Addr.String() } // Validate Token acctID, err = s.validateToken(r.Token) if err != nil { log.Printf("%s REVOKE FAILED %s\n", srcAddr, "PermissionDenied") return nil, errf(codes.PermissionDenied, "%v", "Invalid Token") } // Validate Token/Account owns this file system // Read FileSysRef entry to determine if it exists pKey := fmt.Sprintf("/fs") pKeyA, pKeyB := murmur3.Sum128([]byte(pKey)) cKeyA, cKeyB := murmur3.Sum128([]byte(r.FSid)) _, value, err = s.gstore.Read(context.Background(), pKeyA, pKeyB, cKeyA, cKeyB, nil) if store.IsNotFound(err) { log.Printf("%s REVOKE FAILED %s NOTFOUND", srcAddr, r.FSid) return nil, errf(codes.NotFound, "%v", "Not Found") } if err != nil { log.Printf("%s REVOKE FAILED %v\n", srcAddr, err) return nil, errf(codes.Internal, "%v", err) } err = json.Unmarshal(value, &fsRef) if err != nil { log.Printf("%s REVOKE FAILED %v\n", srcAddr, err) return nil, errf(codes.Internal, "%v", err) } if fsRef.AcctID != acctID { log.Printf("$s REVOKE FAILED %v ACCOUNT MISMATCH", r.FSid) return nil, errf(codes.FailedPrecondition, "%v", "Account Mismatch") } // REVOKE an file system entry for the addr // delete /fs/FSID/addr addr AddrRef pKey = fmt.Sprintf("/fs/%s/addr", r.FSid) pKeyA, pKeyB = murmur3.Sum128([]byte(pKey)) cKeyA, cKeyB = murmur3.Sum128([]byte(r.Addr)) timestampMicro := brimtime.TimeToUnixMicro(time.Now()) _, err = s.gstore.Delete(context.Background(), pKeyA, pKeyB, cKeyA, cKeyB, timestampMicro) if store.IsNotFound(err) { log.Printf("%s REVOKE FAILED %s %s\n", srcAddr, r.FSid, r.Addr) return nil, errf(codes.NotFound, "%v", "Not Found") } // return Addr was revoked // Log Operation log.Printf("%s REVOKE SUCCESS %s %s\n", srcAddr, r.FSid, r.Addr) return &pb.RevokeAddrFSResponse{Data: r.FSid}, nil }
func VSTests() { vsconfigs := make([]ValueClientConfig, *clients) var wg sync.WaitGroup for w := 0; w < *clients; w++ { vsconfigs[w].addr = *vsServer vsconfigs[w].id = w vsconfigs[w].count = perClient vsconfigs[w].value = &value vsconfigs[w].wg = &wg vsconfigs[w].wm = make([]*vp.WriteRequest, perClient) vsconfigs[w].rm = make([]*vp.ReadRequest, perClient) for k := 0; k < perClient; k++ { vsconfigs[w].wm[k] = &vp.WriteRequest{} vsconfigs[w].rm[k] = &vp.ReadRequest{} vsconfigs[w].wm[k].KeyA, vsconfigs[w].wm[k].KeyB = murmur3.Sum128([]byte(fmt.Sprintf("somethingtestkey%d-%d", vsconfigs[w].id, k))) vsconfigs[w].wm[k].TimestampMicro = brimtime.TimeToUnixMicro(time.Now()) vsconfigs[w].rm[k].KeyA = vsconfigs[w].wm[k].KeyA vsconfigs[w].rm[k].KeyB = vsconfigs[w].wm[k].KeyB } } log.Println("ValueStore Key/hash generation complete. Spawning tests.") // ValueStore Tests if *vsWriteTest { t := time.Now() for w := 0; w < *clients; w++ { wg.Add(1) if *streamTest { go ValueStreamWrite(&vsconfigs[w]) } else { go ValueWrite(&vsconfigs[w]) } } wg.Wait() log.Println("Issued", *clients*perClient, "VS WRITES") ts := time.Since(t).Seconds() log.Println("Total run time was:", ts, "seconds") log.Printf("Per second: %.2f\n", float64(*clients*perClient)/ts) } if *vsReadTest { t := time.Now() for w := 0; w < *clients; w++ { wg.Add(1) if *streamTest { go ValueStreamRead(&vsconfigs[w]) } else { go ValueRead(&vsconfigs[w]) } } wg.Wait() log.Println("Issued", *clients*perClient, "VS READS") ts := time.Since(t).Seconds() log.Println("Total run time was:", ts, "seconds") log.Printf("Per second: %.2f\n", float64(*clients*perClient)/ts) } }
// GrantAddrFS ... func (s *FileSystemAPIServer) GrantAddrFS(ctx context.Context, r *fb.GrantAddrFSRequest) (*fb.GrantAddrFSResponse, error) { var status string var err error var acctData AcctPayLoad var fsData FileSysPayLoad var addrData AddrPayLoad var dataB []byte // Get incomming ip pr, ok := peer.FromContext(ctx) if ok { fmt.Println(pr.Addr) } // getAcct data acctData, err = s.getAcct("/acct", r.Acctnum) if err != nil { log.Printf("Error %v on lookup for account %s", err, r.Acctnum) return nil, err } // validate token if acctData.Token != r.Token { return nil, errf(codes.PermissionDenied, "%s", "Invalid Token") } // getFS data fs := fmt.Sprintf("/acct/%s/fs", r.Acctnum) fsData, err = s.getFS(fs, r.FSid) if err != nil { log.Printf("Error %v on lookup for File system %s", err, r.Acctnum) return nil, err } if fsData.Status == "active" { log.Println("FileSystem is active") } // write out the ip address parentKey := fmt.Sprintf("/fs/%s/addr", r.FSid) childKey := r.Addr parentKeyA, parentKeyB := murmur3.Sum128([]byte(parentKey)) childKeyA, childKeyB := murmur3.Sum128([]byte(childKey)) timestampMicro := brimtime.TimeToUnixMicro(time.Now()) addrData.Addr = r.Addr dataB, err = json.Marshal(addrData) if err != nil { log.Printf("Marshal Error: %v\n...", err) return nil, errf(codes.Internal, "%v", err) } _, err = s.fsws.gstore.Write(context.Background(), parentKeyA, parentKeyB, childKeyA, childKeyB, timestampMicro, dataB) if err != nil { log.Printf("Write Error: %v", err) return nil, errf(codes.Internal, "%v", err) } // DO stuff status = fmt.Sprintf("addr %s for filesystem %s with account id %s was granted", r.Addr, r.FSid, r.Acctnum) return &fb.GrantAddrFSResponse{Status: status}, nil }
func (o *StoreComms) WriteValue(ctx context.Context, id, data []byte) error { keyA, keyB := murmur3.Sum128(id) timestampMicro := brimtime.TimeToUnixMicro(time.Now()) oldTimestampMicro, err := o.vstore.Write(ctx, keyA, keyB, timestampMicro, data) if err != nil { return err } if oldTimestampMicro >= timestampMicro { return ErrStoreHasNewerValue } return nil }
// writeGStore ... func (fsws *FileSystemWS) writeGStore(g string, m string, p []byte) (string, error) { // prepare groupVal and memberVal log.Println("Starting a Write to the Group Store") keyA, keyB := murmur3.Sum128([]byte(g)) childKeyA, childKeyB := murmur3.Sum128([]byte(m)) timestampMicro := brimtime.TimeToUnixMicro(time.Now()) newTimestampMicro, err := fsws.gstore.Write(context.Background(), keyA, keyB, childKeyA, childKeyB, timestampMicro, p) if err != nil { return "", err } log.Println("Successfully wrote something to the Group Store") return fmt.Sprintf("TSM: %d", newTimestampMicro), nil }
func (o *OortFS) Remove(ctx context.Context, parent []byte, name string) (int32, error) { v, err := o.validateIP(ctx) if err != nil { return 1, err } if !v { return 1, errors.New("Unknown or unauthorized FS use") } // Get the ID from the group list b, err := o.comms.ReadGroupItem(ctx, parent, []byte(name)) if store.IsNotFound(err) { return 1, nil } else if err != nil { return 1, err } d := &pb.DirEntry{} err = proto.Unmarshal(b, d) if err != nil { return 1, err } // TODO: More error handling needed // TODO: Handle possible race conditions where user writes and deletes the same file over and over // Mark the item deleted in the group t := &pb.Tombstone{} tsm := brimtime.TimeToUnixMicro(time.Now()) t.Dtime = tsm t.Qtime = tsm t.FsId = []byte("1") // TODO: Make sure this gets set when we are tracking fsids inode, err := o.GetInode(ctx, d.Id) if err != nil { return 1, err } t.Blocks = inode.Blocks t.Inode = inode.Inode d.Tombstone = t b, err = proto.Marshal(d) if err != nil { return 1, err } // NOTE: The tsm-1 is kind of a hack because the timestamp needs to be updated on this write, but if we choose tsm, once the actual delete comes through, it will not work because it is going to try to delete with a timestamp of tsm. err = o.comms.WriteGroupTS(ctx, parent, []byte(name), b, tsm-1) if err != nil { return 1, err // Not really sure what should be done here to try to recover from err } o.deleteChan <- &DeleteItem{ parent: parent, name: name, } return 0, nil }
// GrantAddrFS ... func (s *FileSystemAPIServer) GrantAddrFS(ctx context.Context, r *pb.GrantAddrFSRequest) (*pb.GrantAddrFSResponse, error) { var err error var addrData AddrRef var addrByte []byte srcAddr := "" // Get incomming ip pr, ok := peer.FromContext(ctx) if ok { srcAddr = pr.Addr.String() } // validate token _, err = s.validateToken(r.Token) if err != nil { log.Printf("%s GRANT FAILED %s\n", srcAddr, "PermissionDenied") return nil, errf(codes.PermissionDenied, "%v", "Invalid Token") } // GRANT an file system entry for the addr // write /fs/FSID/addr addr AddrRef pKey := fmt.Sprintf("/fs/%s/addr", r.FSid) pKeyA, pKeyB := murmur3.Sum128([]byte(pKey)) cKeyA, cKeyB := murmur3.Sum128([]byte(r.Addr)) timestampMicro := brimtime.TimeToUnixMicro(time.Now()) addrData.Addr = r.Addr addrData.FSID = r.FSid addrByte, err = json.Marshal(addrData) if err != nil { log.Printf("%s GRANT FAILED %v\n", srcAddr, err) return nil, errf(codes.Internal, "%v", err) } _, err = s.gstore.Write(context.Background(), pKeyA, pKeyB, cKeyA, cKeyB, timestampMicro, addrByte) if err != nil { log.Printf("%s GRANT FAILED %v\n", srcAddr, err) return nil, errf(codes.Internal, "%v", err) } // return Addr was Granted // Log Operation log.Printf("%s GRANT SUCCESS %s %s\n", srcAddr, r.FSid, r.Addr) return &pb.GrantAddrFSResponse{Data: r.FSid}, nil }
// RevokeAddrFS ... func (s *FileSystemAPIServer) RevokeAddrFS(ctx context.Context, r *fb.RevokeAddrFSRequest) (*fb.RevokeAddrFSResponse, error) { var status string var err error var acctData AcctPayLoad // Get incomming ip pr, ok := peer.FromContext(ctx) if ok { fmt.Println(pr.Addr) } // getAcct data acctData, err = s.getAcct("/acct", r.Acctnum) if err != nil { log.Printf("Error %v on lookup for account %s", err, r.Acctnum) return nil, errf(codes.NotFound, "%v", err) } // validate token if acctData.Token != r.Token { return nil, errf(codes.PermissionDenied, "%s", "Invalid Token") } parentKey := fmt.Sprintf("/fs/%s/addr", r.FSid) childKey := r.Addr parentKeyA, parentKeyB := murmur3.Sum128([]byte(parentKey)) childKeyA, childKeyB := murmur3.Sum128([]byte(childKey)) timestampMicro := brimtime.TimeToUnixMicro(time.Now()) // Delete addr _, err = s.fsws.gstore.Delete(context.Background(), parentKeyA, parentKeyB, childKeyA, childKeyB, timestampMicro) if store.IsNotFound(err) { log.Printf("/fs/%s/addr/%s did not exist to delete", r.FSid, r.Addr) return nil, errf(codes.NotFound, "%s", "Addr not found") } else if err != nil { return nil, errf(codes.Internal, "%s", err) } // DO stuff status = fmt.Sprintf("addr %s for filesystem %s with account id %s was revoked", r.Addr, r.FSid, r.Acctnum) return &fb.RevokeAddrFSResponse{Status: status}, nil }
func (store *defaultValueStore) outPullReplicationPass(notifyChan chan *bgNotification) *bgNotification { if store.msgRing == nil { return nil } ring := store.msgRing.Ring() if ring == nil || ring.ReplicaCount() < 2 || ring.NodeCount() < 2 { return nil } begin := time.Now() defer func() { elapsed := time.Now().Sub(begin) store.logDebug("outPullReplication: pass took %s", elapsed) atomic.StoreInt64(&store.outPullReplicationNanoseconds, elapsed.Nanoseconds()) }() rightwardPartitionShift := 64 - ring.PartitionBitCount() partitionCount := uint64(1) << ring.PartitionBitCount() if store.pullReplicationState.outIteration == math.MaxUint16 { store.pullReplicationState.outIteration = 0 } else { store.pullReplicationState.outIteration++ } ringVersion := ring.Version() ws := store.pullReplicationState.outWorkers for uint64(len(store.pullReplicationState.outKTBFs)) < ws { store.pullReplicationState.outKTBFs = append(store.pullReplicationState.outKTBFs, newValueKTBloomFilter(store.pullReplicationState.outBloomN, store.pullReplicationState.outBloomP, 0)) } var abort uint32 f := func(p uint64, w uint64, ktbf *valueKTBloomFilter) { pb := p << rightwardPartitionShift rb := pb + ((uint64(1) << rightwardPartitionShift) / ws * w) var re uint64 if w+1 == ws { if p+1 == partitionCount { re = math.MaxUint64 } else { re = ((p + 1) << rightwardPartitionShift) - 1 } } else { re = pb + ((uint64(1) << rightwardPartitionShift) / ws * (w + 1)) - 1 } timestampbitsnow := uint64(brimtime.TimeToUnixMicro(time.Now())) << _TSB_UTIL_BITS cutoff := timestampbitsnow - store.replicationIgnoreRecent var more bool for atomic.LoadUint32(&abort) == 0 { rbThis := rb ktbf.reset(store.pullReplicationState.outIteration) rb, more = store.locmap.ScanCallback(rb, re, 0, _TSB_LOCAL_REMOVAL, cutoff, store.pullReplicationState.outBloomN, func(keyA uint64, keyB uint64, timestampbits uint64, length uint32) bool { ktbf.add(keyA, keyB, timestampbits) return true }) ring2 := store.msgRing.Ring() if ring2 == nil || ring2.Version() != ringVersion { break } reThis := re if more { reThis = rb - 1 } prm := store.newOutPullReplicationMsg(ringVersion, uint32(p), cutoff, rbThis, reThis, ktbf) atomic.AddInt32(&store.outPullReplications, 1) store.msgRing.MsgToOtherReplicas(prm, uint32(p), store.pullReplicationState.outMsgTimeout) if !more { break } } } wg := &sync.WaitGroup{} wg.Add(int(ws)) for w := uint64(0); w < ws; w++ { go func(w uint64) { ktbf := store.pullReplicationState.outKTBFs[w] pb := partitionCount / ws * w for p := pb; p < partitionCount; p++ { if atomic.LoadUint32(&abort) != 0 { break } ring2 := store.msgRing.Ring() if ring2 == nil || ring2.Version() != ringVersion { break } if ring.Responsible(uint32(p)) { f(p, w, ktbf) } } for p := uint64(0); p < pb; p++ { if atomic.LoadUint32(&abort) != 0 { break } ring2 := store.msgRing.Ring() if ring2 == nil || ring2.Version() != ringVersion { break } if ring.Responsible(uint32(p)) { f(p, w, ktbf) } } wg.Done() }(w) } waitChan := make(chan struct{}, 1) go func() { wg.Wait() close(waitChan) }() select { case notification := <-notifyChan: atomic.AddUint32(&abort, 1) <-waitChan return notification case <-waitChan: return nil } }
// inPullReplication actually processes incoming pull-replication messages; // there may be more than one of these workers. func (store *defaultValueStore) inPullReplication(wg *sync.WaitGroup) { k := make([]uint64, store.bulkSetState.msgCap/_VALUE_BULK_SET_MSG_MIN_ENTRY_LENGTH*2) v := make([]byte, store.valueCap) for { prm := <-store.pullReplicationState.inMsgChan if prm == nil { break } if store.msgRing == nil { store.pullReplicationState.inFreeMsgChan <- prm continue } ring := store.msgRing.Ring() if ring == nil { store.pullReplicationState.inFreeMsgChan <- prm continue } k = k[:0] // This is what the remote system used when making its bloom filter, // computed via its config.ReplicationIgnoreRecent setting. We want to // use the exact same cutoff in our checks and possible response. cutoff := prm.cutoff() tombstoneCutoff := (uint64(brimtime.TimeToUnixMicro(time.Now())) << _TSB_UTIL_BITS) - store.tombstoneDiscardState.age ktbf := prm.ktBloomFilter() l := int64(store.bulkSetState.msgCap) callback := func(keyA uint64, keyB uint64, timestampbits uint64, length uint32) bool { if timestampbits&_TSB_DELETION == 0 || timestampbits >= tombstoneCutoff { if !ktbf.mayHave(keyA, keyB, timestampbits) { k = append(k, keyA, keyB) l -= _VALUE_BULK_SET_MSG_ENTRY_HEADER_LENGTH + int64(length) if l <= 0 { return false } } } return true } // Based on the replica index for the local node, start the scan at // different points. For example, in a three replica system the first // replica would start scanning at the start, the second a third // through, the last would start two thirds through. This is so that // pull-replication messages, which are sent concurrently to all other // replicas, will get different responses back instead of duplicate // items if there is a lot of data to be sent. responsibleReplica := ring.ResponsibleReplica(uint32(prm.rangeStart() >> (64 - ring.PartitionBitCount()))) if responsibleReplica < 0 { responsibleReplica = 0 } scanStart := prm.rangeStart() + (prm.rangeStop()-prm.rangeStart())/uint64(ring.ReplicaCount())*uint64(responsibleReplica) scanStop := prm.rangeStop() store.locmap.ScanCallback(scanStart, scanStop, 0, _TSB_LOCAL_REMOVAL, cutoff, math.MaxUint64, callback) if scanStart != prm.rangeStart() && l > 0 { scanStop = scanStart - 1 scanStart = prm.rangeStart() store.locmap.ScanCallback(scanStart, scanStop, 0, _TSB_LOCAL_REMOVAL, cutoff, math.MaxUint64, callback) } nodeID := prm.nodeID() store.pullReplicationState.inFreeMsgChan <- prm if len(k) > 0 { bsm := store.newOutBulkSetMsg() // Indicate that a response to this bulk-set message is not // necessary. If the message fails to reach its destination, that // destination will simply resend another pull replication message // on its next pass. binary.BigEndian.PutUint64(bsm.header, 0) var t uint64 var err error for i := 0; i < len(k); i += 2 { t, v, err = store.read(k[i], k[i+1], v[:0]) if IsNotFound(err) { if t == 0 { continue } } else if err != nil { continue } if t&_TSB_LOCAL_REMOVAL == 0 { if !bsm.add(k[i], k[i+1], t, v) { break } atomic.AddInt32(&store.outBulkSetValues, 1) } } if len(bsm.body) > 0 { atomic.AddInt32(&store.outBulkSets, 1) store.msgRing.MsgToNode(bsm, nodeID, store.pullReplicationState.inResponseMsgTimeout) } } } wg.Done() }
func (o *StoreComms) DeleteValue(ctx context.Context, id []byte) error { timestampMicro := brimtime.TimeToUnixMicro(time.Now()) return o.DeleteValueTS(ctx, id, timestampMicro) }
func GSTests() { gsconfigs := make([]GroupClientConfig, *clients) var wg sync.WaitGroup for w := 0; w < *clients; w++ { gsconfigs[w].addr = *gsServer gsconfigs[w].id = w gsconfigs[w].count = perClient gsconfigs[w].value = &value gsconfigs[w].wg = &wg perGroup := perClient / *groups for g := 0; g < *groups; g++ { grpA, grpB := murmur3.Sum128([]byte(fmt.Sprintf("group%d-%d", gsconfigs[w].id, g))) for k := 0; k < perGroup; k++ { tsm := brimtime.TimeToUnixMicro(time.Now()) wr := &gp.WriteRequest{ KeyA: grpA, KeyB: grpB, TimestampMicro: tsm, } wr.ChildKeyA, wr.ChildKeyB = murmur3.Sum128([]byte(fmt.Sprintf("somethingtestkey%d-%d", gsconfigs[w].id, k))) rr := &gp.ReadRequest{ KeyA: grpA, KeyB: grpB, ChildKeyA: wr.ChildKeyA, ChildKeyB: wr.ChildKeyB, } gsconfigs[w].wm = append(gsconfigs[w].wm, wr) gsconfigs[w].rm = append(gsconfigs[w].rm, rr) } } } log.Println("GroupStore Key/hash generation complete. Spawning tests.") if *gsWriteTest { t := time.Now() for w := 0; w < *clients; w++ { wg.Add(1) if *streamTest { go GroupStreamWrite(&gsconfigs[w]) } else { go GroupWrite(&gsconfigs[w]) } } wg.Wait() log.Println("Issued", *clients*perClient, "GS WRITES") ts := time.Since(t).Seconds() log.Println("Total run time was:", ts, "seconds") log.Printf("Per second: %.2f\n", float64(*clients*perClient)/ts) } if *gsReadTest { t := time.Now() for w := 0; w < *clients; w++ { wg.Add(1) if *streamTest { go GroupStreamRead(&gsconfigs[w]) } else { go GroupRead(&gsconfigs[w]) } } wg.Wait() log.Println("Issued", *clients*perClient, "GS READS") ts := time.Since(t).Seconds() log.Println("Total run time was:", ts, "seconds") log.Printf("Per second: %.2f\n", float64(*clients*perClient)/ts) } }
func (o *StoreComms) DeleteGroupItem(ctx context.Context, key, childKey []byte) error { timestampMicro := brimtime.TimeToUnixMicro(time.Now()) return o.DeleteGroupItemTS(ctx, key, childKey, timestampMicro) }
func (o *StoreComms) WriteGroup(ctx context.Context, key, childKey, value []byte) error { timestampMicro := brimtime.TimeToUnixMicro(time.Now()) return o.WriteGroupTS(ctx, key, childKey, value, timestampMicro) }
// tombstoneDiscardPassExpiredDeletions scans for entries marked with // _TSB_DELETION (but not _TSB_LOCAL_REMOVAL) that are older than the maximum // tombstone age and marks them for _TSB_LOCAL_REMOVAL. func (store *defaultGroupStore) tombstoneDiscardPassExpiredDeletions(notifyChan chan *bgNotification) *bgNotification { // Each worker will perform a pass on a subsection of each partition's key // space. Additionally, each worker will start their work on different // partition. This reduces contention for a given section of the locmap. partitionShift := uint16(0) partitionMax := uint64(0) if store.msgRing != nil { pbc := store.msgRing.Ring().PartitionBitCount() partitionShift = 64 - pbc partitionMax = (uint64(1) << pbc) - 1 } workerMax := uint64(store.workers - 1) workerPartitionPiece := (uint64(1) << partitionShift) / (workerMax + 1) work := func(partition uint64, worker uint64, localRemovals []groupLocalRemovalEntry) { partitionOnLeftBits := partition << partitionShift rangeBegin := partitionOnLeftBits + (workerPartitionPiece * worker) var rangeEnd uint64 // A little bit of complexity here to handle where the more general // expressions would have overflow issues. if worker != workerMax { rangeEnd = partitionOnLeftBits + (workerPartitionPiece * (worker + 1)) - 1 } else { if partition != partitionMax { rangeEnd = ((partition + 1) << partitionShift) - 1 } else { rangeEnd = math.MaxUint64 } } cutoff := (uint64(brimtime.TimeToUnixMicro(time.Now())) << _TSB_UTIL_BITS) - store.tombstoneDiscardState.age more := true for more { localRemovalsIndex := 0 // Since we shouldn't try to modify what we're scanning while we're // scanning (lock contention) we instead record in localRemovals // what to modify after the scan. rangeBegin, more = store.locmap.ScanCallback(rangeBegin, rangeEnd, _TSB_DELETION, _TSB_LOCAL_REMOVAL, cutoff, uint64(store.tombstoneDiscardState.batchSize), func(keyA uint64, keyB uint64, childKeyA uint64, childKeyB uint64, timestampbits uint64, length uint32) bool { e := &localRemovals[localRemovalsIndex] e.keyA = keyA e.keyB = keyB e.childKeyA = childKeyA e.childKeyB = childKeyB e.timestampbits = timestampbits localRemovalsIndex++ return true }) atomic.AddInt32(&store.expiredDeletions, int32(localRemovalsIndex)) for i := 0; i < localRemovalsIndex; i++ { e := &localRemovals[i] // These writes go through the entire system, so they're // persisted and therefore restored on restarts. store.write(e.keyA, e.keyB, e.childKeyA, e.childKeyB, e.timestampbits|_TSB_LOCAL_REMOVAL, nil, true) } } } // To avoid memory churn, the localRemovals scratchpads are allocated just // once and passed in to the workers. for len(store.tombstoneDiscardState.localRemovals) <= int(workerMax) { store.tombstoneDiscardState.localRemovals = append(store.tombstoneDiscardState.localRemovals, make([]groupLocalRemovalEntry, store.tombstoneDiscardState.batchSize)) } var abort uint32 wg := &sync.WaitGroup{} wg.Add(int(workerMax + 1)) for worker := uint64(0); worker <= workerMax; worker++ { go func(worker uint64) { localRemovals := store.tombstoneDiscardState.localRemovals[worker] partitionBegin := (partitionMax + 1) / (workerMax + 1) * worker for partition := partitionBegin; ; { if atomic.LoadUint32(&abort) != 0 { break } work(partition, worker, localRemovals) partition++ if partition > partitionMax { partition = 0 } if partition == partitionBegin { break } } wg.Done() }(worker) } waitChan := make(chan struct{}, 1) go func() { wg.Wait() close(waitChan) }() select { case notification := <-notifyChan: atomic.AddUint32(&abort, 1) <-waitChan return notification case <-waitChan: return nil } }
func (c *Client) parseGroupCmd(line string) (string, error) { if c.gstore == nil { err := c.getGroupClient() if err != nil { return "", err } } split := strings.SplitN(line, " ", 2) cmd := split[0] if len(split) != 2 { if cmd == "exit" { return "", fmt.Errorf("Exiting..") } if cmd == "help" { return c.printHelp(), nil } return c.printHelp(), nil } args := split[1] switch cmd { case "write": sarg := strings.SplitN(args, " ", 3) if len(sarg) < 3 { return fmt.Sprintf("write needs groupkey, key, value: `write groupkey somekey some value thing here`"), nil } keyA, keyB := murmur3.Sum128([]byte(sarg[0])) childKeyA, childKeyB := murmur3.Sum128([]byte(sarg[1])) timestampMicro := brimtime.TimeToUnixMicro(time.Now()) oldTimestampMicro, err := c.gstore.Write(context.Background(), keyA, keyB, childKeyA, childKeyB, timestampMicro, []byte(sarg[2])) if err != nil { return "", err } return fmt.Sprintf("WRITE TIMESTAMPMICRO: %d\nPREVIOUS TIMESTAMPMICRO: %d", timestampMicro, oldTimestampMicro), nil case "write-hash": sarg := strings.SplitN(args, " ", 4) if len(sarg) < 4 { return fmt.Sprintf("write-hash needs groupkey, keyahash keybhash, value: `write-hash groupkey 19191919 19191919 some value thing here`"), nil } keyA, keyB := murmur3.Sum128([]byte(sarg[0])) childKeyA, err := strconv.ParseUint(sarg[1], 10, 64) if err != nil { return "", err } childKeyB, err := strconv.ParseUint(sarg[2], 10, 64) if err != nil { return "", err } timestampMicro := brimtime.TimeToUnixMicro(time.Now()) oldTimestampMicro, err := c.gstore.Write(context.Background(), keyA, keyB, childKeyA, childKeyB, timestampMicro, []byte(sarg[3])) if err != nil { return "", err } return fmt.Sprintf("WRITE TIMESTAMPMICRO: %d\n PREVIOUS TIMESTAMPMICRO: %d", timestampMicro, oldTimestampMicro), nil case "read": sarg := strings.SplitN(args, " ", 2) if len(sarg) < 2 { return fmt.Sprintf("read needs groupkey, subkey"), nil } keyA, keyB := murmur3.Sum128([]byte(sarg[0])) childKeyA, childKeyB := murmur3.Sum128([]byte(sarg[1])) timestampMicro, value, err := c.gstore.Read(context.Background(), keyA, keyB, childKeyA, childKeyB, nil) if store.IsNotFound(err) { return fmt.Sprintf("not found"), nil } else if err != nil { return "", err } return fmt.Sprintf("TIMESTAMPMICRO: %d\nVALUE: %s", timestampMicro, value), nil case "read-hash": sarg := strings.SplitN(args, " ", 3) if len(sarg) < 3 { return fmt.Sprintf("read needs groupkey, subkeyA, subkeyB"), nil } keyA, keyB := murmur3.Sum128([]byte(sarg[0])) childKeyA, err := strconv.ParseUint(sarg[1], 10, 64) if err != nil { return "", err } childKeyB, err := strconv.ParseUint(sarg[2], 10, 64) if err != nil { return "", err } timestampMicro, value, err := c.gstore.Read(context.Background(), keyA, keyB, childKeyA, childKeyB, nil) if store.IsNotFound(err) { return fmt.Sprintf("not found"), nil } else if err != nil { return "", err } return fmt.Sprintf("TIMESTAMPMICRO: %d\nVALUE: %s", timestampMicro, value), nil case "read-group": KeyA, KeyB := murmur3.Sum128([]byte(args)) items, err := c.gstore.ReadGroup(context.Background(), KeyA, KeyB) if store.IsNotFound(err) { return fmt.Sprintf("not found"), nil } else if err != nil { return "", err } keys := make([]string, len(items)) for k, v := range items { keys[k] = fmt.Sprintf("TIMESTAMPMICRO: %d [ %d | %d] VALUE: %s", v.TimestampMicro, v.ChildKeyA, v.ChildKeyB, v.Value) } return fmt.Sprintf(strings.Join(keys, "\n")), nil case "delete": sarg := strings.SplitN(args, " ", 2) if len(sarg) < 2 { return fmt.Sprintf("delete needs groupkey, subkey"), nil } keyA, keyB := murmur3.Sum128([]byte(sarg[0])) childKeyA, childKeyB := murmur3.Sum128([]byte(sarg[1])) timestampMicro := brimtime.TimeToUnixMicro(time.Now()) oldTimestampMicro, err := c.gstore.Delete(context.Background(), keyA, keyB, childKeyA, childKeyB, timestampMicro) if err != nil { return "", err } return fmt.Sprintf("TIMESTAMPMICRO: %d\nOLD TIMESTAMPMICRO: %d", timestampMicro, oldTimestampMicro), nil case "lookup": sarg := strings.SplitN(args, " ", 2) if len(sarg) < 2 { return fmt.Sprintf("lookup needs groupkey, subkey"), nil } keyA, keyB := murmur3.Sum128([]byte(sarg[0])) childKeyA, childKeyB := murmur3.Sum128([]byte(sarg[1])) timestampMicro, length, err := c.gstore.Lookup(context.Background(), keyA, keyB, childKeyA, childKeyB) if store.IsNotFound(err) { return fmt.Sprintf("not found"), nil } else if err != nil { return "", err } return fmt.Sprintf("TIMESTAMPMICRO: %d\nLENGTH: %d", timestampMicro, length), nil case "lookup-group": keyA, keyB := murmur3.Sum128([]byte(args)) items, err := c.gstore.LookupGroup(context.Background(), keyA, keyB) if store.IsNotFound(err) { return fmt.Sprintf("not found"), nil } else if err != nil { return "", err } keys := make([]string, len(items)) for k, v := range items { keys[k] = fmt.Sprintf("TIMESTAMPMICRO: %d [ %d | %d ]", v.TimestampMicro, v.ChildKeyA, v.ChildKeyB) } return fmt.Sprintf(strings.Join(keys, "\n")), nil case "mode": if args == "value" { c.gmode = false return fmt.Sprintf("Switched to value mode"), nil } if args == "group" { return fmt.Sprintf("Already in group store mode"), nil } return fmt.Sprintf("Valid modes are: value | group"), nil case "exit": log.Println("exit") return "", fmt.Errorf("Exiting..") } return c.printHelp(), nil }
// CreateFS ... func (s *FileSystemAPIServer) CreateFS(ctx context.Context, r *fb.CreateFSRequest) (*fb.CreateFSResponse, error) { var status string var result string var acctData AcctPayLoad var newFS FileSysPayLoad var acctRef AcctRefPayload var acctRefB []byte var err error var dataB []byte // Get incomming ip pr, ok := peer.FromContext(ctx) if ok { fmt.Println(pr.Addr) } // getAcct data acctData, err = s.getAcct("/acct", r.Acctnum) if err != nil { log.Printf("Error %v on lookup for account %s", err, r.Acctnum) return nil, err } // validate token if acctData.Token != r.Token { return nil, errf(codes.PermissionDenied, "%s", "Invalid Token") } // Check for to see if file system name exists fs := fmt.Sprintf("/acct/%s/fs", acctData.ID) err = s.dupNameCheck(fs, r.FSName) if err != nil { log.Printf("Precondition Failed: %v\n...", err) return nil, errf(codes.FailedPrecondition, "%v", err) } // File system values parentKey := fmt.Sprintf("/acct/%s/fs", r.Acctnum) childKey := uuid.NewV4().String() newFS.ID = childKey newFS.AcctID = r.Acctnum newFS.Name = r.FSName newFS.SizeInBytes = 107374182400 newFS.Status = "active" newFS.CreateDate = time.Now().Unix() newFS.DeleteDate = 0 //write file system dataB, err = json.Marshal(newFS) if err != nil { log.Printf("Marshal Error: %v\n...", err) return nil, errf(codes.Internal, "%v", err) } _, err = s.fsws.writeGStore(parentKey, childKey, dataB) if err != nil { log.Printf("Write Error: %v", err) return nil, errf(codes.Internal, "%v", err) } // Write special filesystem look up entry // "/fs" "[file system uuid]" {"id": "[filesystem uuid]", "acctid": "[account uuid]"} parentKeyA, parentKeyB := murmur3.Sum128([]byte("/fs")) childKeyA, childKeyB := murmur3.Sum128([]byte(newFS.ID)) timestampMicro := brimtime.TimeToUnixMicro(time.Now()) acctRef.ID = newFS.ID acctRef.AcctID = r.Acctnum acctRefB, err = json.Marshal(acctRef) if err != nil { log.Printf("Marshal Error: %v\n...", err) return nil, errf(codes.Internal, "%v", err) } _, err = s.fsws.gstore.Write(context.Background(), parentKeyA, parentKeyB, childKeyA, childKeyB, timestampMicro, acctRefB) if err != nil { log.Printf("Write Error: %v", err) return nil, errf(codes.Internal, "%v", err) } // Prep reults to return status = "OK" result = fmt.Sprintf("File System %s was created from Account %s", childKey, r.Acctnum) return &fb.CreateFSResponse{Payload: result, Status: status}, nil }
func (c *Client) parseValueCmd(line string) (string, error) { if c.vconn == nil { err := c.getValueClient() if err != nil { return "", err } } split := strings.SplitN(line, " ", 2) cmd := split[0] if len(split) != 2 { if cmd == "exit" { return "", fmt.Errorf("Exiting..") } if cmd == "help" { return c.printHelp(), nil } return c.printHelp(), nil } args := split[1] switch cmd { case "write": sarg := strings.SplitN(args, " ", 2) if len(sarg) < 2 { return fmt.Sprintf("write needs key and value: `write somekey some value thing here`"), nil } keyA, keyB := murmur3.Sum128([]byte(sarg[0])) value := []byte(sarg[1]) timestampMicro := brimtime.TimeToUnixMicro(time.Now()) oldTimestampMicro, err := c.vstore.Write(context.Background(), keyA, keyB, timestampMicro, value) if err != nil { return "", err } return fmt.Sprintf("WRITE TIMESTAMPMICRO: %d\nPREVIOUS TIMESTAMPMICRO: %d", timestampMicro, oldTimestampMicro), nil case "read": keyA, keyB := murmur3.Sum128([]byte(args)) timestampMicro, value, err := c.vstore.Read(context.Background(), keyA, keyB, nil) if store.IsNotFound(err) { return fmt.Sprintf("not found"), nil } else if err != nil { return "", err } return fmt.Sprintf("TIMESTAMPMICRO: %d\nVALUE: %s", timestampMicro, value), nil case "delete": keyA, keyB := murmur3.Sum128([]byte(args)) timestampMicro := brimtime.TimeToUnixMicro(time.Now()) oldTimestampMicro, err := c.vstore.Delete(context.Background(), keyA, keyB, timestampMicro) if err != nil { return "", err } return fmt.Sprintf("TIMESTAMPMICRO: %d\nOLD TIMESTAMPMICRO: %d", timestampMicro, oldTimestampMicro), nil case "lookup": keyA, keyB := murmur3.Sum128([]byte(args)) timestampMicro, length, err := c.vstore.Lookup(context.Background(), keyA, keyB) if store.IsNotFound(err) { return fmt.Sprintf("not found"), nil } else if err != nil { return "", err } return fmt.Sprintf("TIMESTAMPMICRO: %d\nLENGTH: %d", timestampMicro, length), nil case "mode": if args == "value" { return fmt.Sprintf("Already in value store mode"), nil } if args == "group" { c.gmode = true return fmt.Sprintf("Switched to group mode"), nil } return fmt.Sprintf("Valid modes are: value | group"), nil case "exit": log.Println("exit") return "", fmt.Errorf("Exiting..") } return c.printHelp(), nil }
// CreateFS ... func (s *FileSystemAPIServer) CreateFS(ctx context.Context, r *pb.CreateFSRequest) (*pb.CreateFSResponse, error) { var err error var acctID string srcAddr := "" var fsRef FileSysRef var fsRefByte []byte var fsSysAttr FileSysAttr var fsSysAttrByte []byte // Get incomming ip pr, ok := peer.FromContext(ctx) if ok { srcAddr = pr.Addr.String() } // Validate Token acctID, err = s.validateToken(r.Token) if err != nil { log.Printf("%s CREATE FAILED %s\n", srcAddr, "PermissionDenied") return nil, errf(codes.PermissionDenied, "%v", "Invalid Token") } fsID := uuid.NewV4().String() timestampMicro := brimtime.TimeToUnixMicro(time.Now()) // Write file system reference entries. // write /fs FSID FileSysRef pKey := "/fs" pKeyA, pKeyB := murmur3.Sum128([]byte(pKey)) cKeyA, cKeyB := murmur3.Sum128([]byte(fsID)) fsRef.AcctID = acctID fsRef.FSID = fsID fsRefByte, err = json.Marshal(fsRef) if err != nil { log.Printf("%s CREATE FAILED %v\n", srcAddr, err) return nil, errf(codes.Internal, "%v", err) } _, err = s.gstore.Write(context.Background(), pKeyA, pKeyB, cKeyA, cKeyB, timestampMicro, fsRefByte) if err != nil { log.Printf("%s CREATE FAILED %v\n", srcAddr, err) return nil, errf(codes.Internal, "%v", err) } // write /acct/acctID FSID FileSysRef pKey = fmt.Sprintf("/acct/%s", acctID) pKeyA, pKeyB = murmur3.Sum128([]byte(pKey)) _, err = s.gstore.Write(context.Background(), pKeyA, pKeyB, cKeyA, cKeyB, timestampMicro, fsRefByte) if err != nil { log.Printf("%s CREATE FAILED %v\n", srcAddr, err) return nil, errf(codes.Internal, "%v", err) } // Write file system attributes // write /fs/FSID name FileSysAttr pKey = fmt.Sprintf("/fs/%s", fsID) pKeyA, pKeyB = murmur3.Sum128([]byte(pKey)) cKeyA, cKeyB = murmur3.Sum128([]byte("name")) fsSysAttr.Attr = "name" fsSysAttr.Value = r.FSName fsSysAttr.FSID = fsID fsSysAttrByte, err = json.Marshal(fsSysAttr) if err != nil { log.Printf("%s CREATE FAILED %v\n", srcAddr, err) return nil, errf(codes.Internal, "%v", err) } _, err = s.gstore.Write(context.Background(), pKeyA, pKeyB, cKeyA, cKeyB, timestampMicro, fsSysAttrByte) if err != nil { log.Printf("%s CREATE FAILED %v\n", srcAddr, err) return nil, errf(codes.Internal, "%v", err) } // Return File System UUID // Log Operation log.Printf("%s CREATE SUCCESS %s\n", srcAddr, fsID) return &pb.CreateFSResponse{Data: fsID}, nil }
func (store *defaultGroupStore) pushReplicationPass(notifyChan chan *bgNotification) *bgNotification { if store.msgRing == nil { return nil } begin := time.Now() defer func() { store.logDebug("pushReplication: pass took %s", time.Now().Sub(begin)) }() ring := store.msgRing.Ring() if ring == nil { return nil } ringVersion := ring.Version() pbc := ring.PartitionBitCount() partitionShift := uint64(64 - pbc) partitionMax := (uint64(1) << pbc) - 1 workerMax := uint64(store.pushReplicationState.workers - 1) workerPartitionPiece := (uint64(1) << partitionShift) / (workerMax + 1) // To avoid memory churn, the scratchpad areas are allocated just once and // passed in to the workers. for len(store.pushReplicationState.lists) < int(workerMax+1) { store.pushReplicationState.lists = append(store.pushReplicationState.lists, make([]uint64, store.bulkSetState.msgCap/_GROUP_BULK_SET_MSG_MIN_ENTRY_LENGTH*4)) } for len(store.pushReplicationState.valBufs) < int(workerMax+1) { store.pushReplicationState.valBufs = append(store.pushReplicationState.valBufs, make([]byte, store.valueCap)) } var abort uint32 work := func(partition uint64, worker uint64, list []uint64, valbuf []byte) { partitionOnLeftBits := partition << partitionShift rangeBegin := partitionOnLeftBits + (workerPartitionPiece * worker) var rangeEnd uint64 // A little bit of complexity here to handle where the more general // expressions would have overflow issues. if worker != workerMax { rangeEnd = partitionOnLeftBits + (workerPartitionPiece * (worker + 1)) - 1 } else { if partition != partitionMax { rangeEnd = ((partition + 1) << partitionShift) - 1 } else { rangeEnd = math.MaxUint64 } } timestampbitsNow := uint64(brimtime.TimeToUnixMicro(time.Now())) << _TSB_UTIL_BITS cutoff := timestampbitsNow - store.replicationIgnoreRecent tombstoneCutoff := timestampbitsNow - store.tombstoneDiscardState.age availableBytes := int64(store.bulkSetState.msgCap) list = list[:0] // We ignore the "more" option from ScanCallback and just send the // first matching batch each full iteration. Once a remote end acks the // batch, those keys will have been removed and the first matching // batch will start with any remaining keys. // First we gather the matching keys to send. store.locmap.ScanCallback(rangeBegin, rangeEnd, 0, _TSB_LOCAL_REMOVAL, cutoff, math.MaxUint64, func(keyA uint64, keyB uint64, childKeyA uint64, childKeyB uint64, timestampbits uint64, length uint32) bool { inMsgLength := _GROUP_BULK_SET_MSG_ENTRY_HEADER_LENGTH + int64(length) if timestampbits&_TSB_DELETION == 0 || timestampbits >= tombstoneCutoff { list = append(list, keyA, keyB, childKeyA, childKeyB) availableBytes -= inMsgLength if availableBytes < inMsgLength { return false } } return true }) if len(list) <= 0 || atomic.LoadUint32(&abort) != 0 { return } ring2 := store.msgRing.Ring() if ring2 == nil || ring2.Version() != ringVersion { return } // Then we build and send the actual message. bsm := store.newOutBulkSetMsg() var timestampbits uint64 var err error for i := 0; i < len(list); i += 4 { timestampbits, valbuf, err = store.read(list[i], list[i+1], list[i+2], list[i+3], valbuf[:0]) // This might mean we need to send a deletion or it might mean the // key has been completely removed from our records // (timestampbits==0). if IsNotFound(err) { if timestampbits == 0 { continue } } else if err != nil { continue } if timestampbits&_TSB_LOCAL_REMOVAL == 0 && timestampbits < cutoff && (timestampbits&_TSB_DELETION == 0 || timestampbits >= tombstoneCutoff) { if !bsm.add(list[i], list[i+1], list[i+2], list[i+3], timestampbits, valbuf) { break } atomic.AddInt32(&store.outBulkSetPushValues, 1) } } atomic.AddInt32(&store.outBulkSetPushes, 1) store.msgRing.MsgToOtherReplicas(bsm, uint32(partition), store.pushReplicationState.msgTimeout) } wg := &sync.WaitGroup{} wg.Add(int(workerMax + 1)) for worker := uint64(0); worker <= workerMax; worker++ { go func(worker uint64) { list := store.pushReplicationState.lists[worker] valbuf := store.pushReplicationState.valBufs[worker] partitionBegin := (partitionMax + 1) / (workerMax + 1) * worker for partition := partitionBegin; ; { if atomic.LoadUint32(&abort) != 0 { break } ring2 := store.msgRing.Ring() if ring2 == nil || ring2.Version() != ringVersion { break } if !ring.Responsible(uint32(partition)) { work(partition, worker, list, valbuf) } partition++ if partition > partitionMax { partition = 0 } if partition == partitionBegin { break } } wg.Done() }(worker) } waitChan := make(chan struct{}, 1) go func() { wg.Wait() close(waitChan) }() select { case notification := <-notifyChan: atomic.AddUint32(&abort, 1) <-waitChan return notification case <-waitChan: return nil } }