func main() { rand.Seed(time.Now().UnixNano()) x.Init() checkFlagsAndInitDirs() ps, err := store.NewStore(*postingDir) x.Checkf(err, "Error initializing postings store") defer ps.Close() if len(*schemaFile) > 0 { err = schema.Parse(*schemaFile) x.Checkf(err, "Error while loading schema: %s", *schemaFile) } // Posting will initialize index which requires schema. Hence, initialize // schema before calling posting.Init(). posting.Init(ps) worker.Init(ps) x.Check(group.ParseGroupConfig(*conf)) // Setup external communication. che := make(chan error, 1) go setupServer(che) go worker.StartRaftNodes(*walDir) if err := <-che; !strings.Contains(err.Error(), "use of closed network connection") { log.Fatal(err) } }
func (p *poolsi) connect(addr string) { if addr == *myAddr { return } p.RLock() _, has := p.all[addr] p.RUnlock() if has { return } pool := newPool(addr, 5) query := new(Payload) query.Data = make([]byte, 10) x.Check2(rand.Read(query.Data)) conn, err := pool.Get() x.Checkf(err, "Unable to connect") c := NewWorkerClient(conn) resp, err := c.Echo(context.Background(), query) x.Checkf(err, "Unable to Echo") x.AssertTrue(bytes.Equal(resp.Data, query.Data)) x.Check(pool.Put(conn)) fmt.Printf("Connection with %q successful.\n", addr) p.Lock() defer p.Unlock() _, has = p.all[addr] if has { return } p.all[addr] = pool }
func (n *node) snapshotPeriodically() { ticker := time.NewTicker(10 * time.Minute) for { select { case <-ticker.C: le, err := n.store.LastIndex() x.Checkf(err, "Unable to retrieve last index") existing, err := n.store.Snapshot() x.Checkf(err, "Unable to get existing snapshot") si := existing.Metadata.Index if le <= si { continue } msg := fmt.Sprintf("Snapshot from %v", strconv.FormatUint(n.id, 10)) _, err = n.store.CreateSnapshot(le, nil, []byte(msg)) x.Checkf(err, "While creating snapshot") x.Checkf(n.store.Compact(le), "While compacting snapshot") case <-n.done: return } } }
func main() { flag.Parse() logrus.SetLevel(logrus.DebugLevel) var srcl, dstl []R f, bufReader := getReader(*src) var err error srcCount := 0 var strBuf bytes.Buffer for { err = x.ReadLine(bufReader, &strBuf) if err != nil { break } srcCount++ rnq, err := rdf.Parse(strBuf.String()) x.Checkf(err, "Unable to parse line: [%v]", strBuf.String()) srcl = append(srcl, convert(rnq)) } if err != nil && err != io.EOF { err := x.Errorf("Error while reading file: %v", err) log.Fatalf("%+v", err) } x.Check(f.Close()) fmt.Println("Source done") f, bufReader = getReader(*dst) dstCount := 0 for { err = x.ReadLine(bufReader, &strBuf) if err != nil { break } dstCount++ rnq, err := rdf.Parse(strBuf.String()) x.Checkf(err, "Unable to parse line: [%v]", strBuf.String()) dstl = append(dstl, convert(rnq)) } if err != nil && err != io.EOF { err := x.Errorf("Error while reading file: %v", err) log.Fatalf("%+v", err) } x.Check(f.Close()) fmt.Printf("Src: [%d] Dst: [%d]\n", srcCount, dstCount) sort.Sort(ByR(srcl)) sort.Sort(ByR(dstl)) fmt.Println("Comparing now") //for i := 0; i < 100; i++ { //fmt.Printf("[S,D] %v %v\n", srcl[i], dstl[i]) //} compare(srcl, dstl) }
// getPostingList tries to get posting list from l.pbuffer. If it is nil, then // we query RocksDB. There is no need for lock acquisition here. func (l *List) getPostingList(loop int) *types.PostingList { if loop >= 10 { x.Fatalf("This is over the 10th loop: %v", loop) } l.AssertRLock() // Wait for any previous commits to happen before retrieving posting list again. l.Wait() pb := atomic.LoadPointer(&l.pbuffer) plist := (*types.PostingList)(pb) if plist == nil { x.AssertTrue(l.pstore != nil) plist = new(types.PostingList) if slice, err := l.pstore.Get(l.key); err == nil && slice != nil { x.Checkf(plist.Unmarshal(slice.Data()), "Unable to Unmarshal PostingList from store") slice.Free() } if atomic.CompareAndSwapPointer(&l.pbuffer, pb, unsafe.Pointer(plist)) { return plist } // Someone else replaced the pointer in the meantime. Retry recursively. return l.getPostingList(loop + 1) } return plist }
func convert(n rdf.NQuad) R { r := R{} var err error r.os = n.Subject r.s, err = rdf.GetUid(n.Subject) x.Checkf(err, "Subject: %v", n.Subject) r.p = n.Predicate if len(n.ObjectId) > 0 { r.o, err = rdf.GetUid(n.ObjectId) x.Checkf(err, "Object: %v", n.ObjectId) r.oo = n.ObjectId } r.v = string(n.ObjectValue) return r }
func makeRequests(mutation chan string, wg *sync.WaitGroup) { for m := range mutation { counter := atomic.AddUint64(&s.mutations, 1) if counter%100 == 0 { num := atomic.LoadUint64(&s.rdfs) dur := time.Since(s.start) rate := float64(num) / dur.Seconds() fmt.Printf("[Request: %6d] Total RDFs done: %8d RDFs per second: %7.0f\r", counter, num, rate) } RETRY: req, err := http.NewRequest("POST", *dgraph, strings.NewReader(body(m))) x.Check(err) res, err := hc.Do(req) if err != nil { fmt.Printf("Retrying req: %d. Error: %v\n", counter, err) time.Sleep(5 * time.Millisecond) goto RETRY } body, err := ioutil.ReadAll(res.Body) x.Check(err) if err = json.Unmarshal(body, &r); err != nil { // Not doing x.Checkf(json.Unmarshal..., "Response..", string(body)) // to ensure that we don't try to convert body from []byte to string // when there's no errors. x.Checkf(err, "HTTP Status: %s Response body: %s.", http.StatusText(res.StatusCode), string(body)) } if r.Code != "ErrorOk" { log.Fatalf("Error while performing mutation: %v, err: %v", m, r.Message) } } wg.Done() }
// allocateUniqueUid returns an integer in range: // [minIdx, maxIdx] derived based on numInstances and instanceIdx. // which hasn't already been allocated to other xids. It does this by // taking the fingerprint of the xid appended with zero or more spaces // until the obtained integer is unique. func allocateUniqueUid(instanceIdx uint64, numInstances uint64) uint64 { mod := math.MaxUint64 / numInstances minIdx := instanceIdx * mod buf := make([]byte, 128) for { _, err := rand.Read(buf) x.Checkf(err, "rand.Read shouldn't throw an error") uidb := farm.Fingerprint64(buf) // Generate from hash. uid := (uidb % mod) + minIdx if uid == math.MaxUint64 || !lmgr.isNew(uid) { continue } // Check if this uid has already been allocated. key := x.DataKey("_uid_", uid) pl, decr := posting.GetOrCreate(key) defer decr() if pl.Length(0) == 0 { return uid } } log.Fatalf("This shouldn't be reached.") return 0 }
func parsePeer(peer string) (uint64, string) { x.AssertTrue(len(peer) > 0) kv := strings.SplitN(peer, ":", 2) x.AssertTruef(len(kv) == 2, "Invalid peer format: %v", peer) pid, err := strconv.ParseUint(kv[0], 10, 64) x.Checkf(err, "Invalid peer id: %v", kv[0]) // TODO: Validate the url kv[1] return pid, kv[1] }
// StartRaftNodes will read the WAL dir, create the RAFT groups, // and either start or restart RAFT nodes. // This function triggers RAFT nodes to be created, and is the entrace to the RAFT // world from main.go. func StartRaftNodes(walDir string) { gr = new(groupi) gr.ctx, gr.cancel = context.WithCancel(context.Background()) // Successfully connect with the peer, before doing anything else. if len(*peer) > 0 { _, paddr := parsePeer(*peer) pools().connect(paddr) // Force run syncMemberships with this peer, so our nodes know if they have other // servers who are serving the same groups. That way, they can talk to them // and try to join their clusters. Otherwise, they'll start off as a single-node // cluster. // IMPORTANT: Don't run any nodes until we have done at least one full sync for membership // information with the cluster. If you start this node too quickly, just // after starting the leader of group zero, that leader might not have updated // itself in the memberships; and hence this node would think that no one is handling // group zero. Therefore, we MUST wait to get pass a last update raft index of zero. for gr.LastUpdate() == 0 { time.Sleep(time.Second) fmt.Println("Last update raft index for membership information is zero. Syncing...") gr.syncMemberships() } fmt.Printf("Last update is now: %d\n", gr.LastUpdate()) } x.Checkf(os.MkdirAll(walDir, 0700), "Error while creating WAL dir.") wals, err := store.NewSyncStore(walDir) x.Checkf(err, "Error initializing wal store") gr.wal = raftwal.Init(wals, *raftId) if len(*myAddr) == 0 { *myAddr = fmt.Sprintf("localhost:%d", *workerPort) } for _, id := range strings.Split(*groupIds, ",") { gid, err := strconv.ParseUint(id, 0, 32) x.Checkf(err, "Unable to parse group id: %v", id) node := groups().newNode(uint32(gid), *raftId, *myAddr) go node.InitAndStartNode(gr.wal) } go gr.periodicSyncMemberships() // Now set it to be run periodically. }
func (n *node) processSnapshot(s raftpb.Snapshot) { lead := n.raft.Status().Lead if lead == 0 { return } addr := n.peers.Get(lead) x.AssertTruef(addr != "", "Should have the leader address: %v", lead) pool := pools().get(addr) x.AssertTruef(pool != nil, "Leader: %d pool should not be nil", lead) _, err := populateShard(context.TODO(), pool, 0) x.Checkf(err, "processSnapshot") }
func (n *node) joinPeers() { // Get leader information for MY group. pid, paddr := groups().Leader(n.gid) n.Connect(pid, paddr) fmt.Printf("Connected with: %v\n", paddr) addr := n.peers.Get(pid) pool := pools().get(addr) x.AssertTruef(pool != nil, "Unable to find addr for peer: %d", pid) // Bring the instance up to speed first. _, err := populateShard(n.ctx, pool, 0) x.Checkf(err, "Error while populating shard") conn, err := pool.Get() x.Check(err) defer pool.Put(conn) c := NewWorkerClient(conn) x.Printf("Calling JoinCluster") _, err = c.JoinCluster(n.ctx, n.raftContext) x.Checkf(err, "Error while joining cluster") x.Printf("Done with JoinCluster call\n") }
func (l *List) commit() (committed bool, rerr error) { l.Lock() defer l.Unlock() if len(l.mlayer) == 0 { atomic.StoreInt64(&l.dirtyTs, 0) return false, nil } var final types.PostingList ubuf := make([]byte, 16) h := md5.New() count := 0 l.iterate(0, func(p *types.Posting) bool { // Checksum code. n := binary.PutVarint(ubuf, int64(count)) h.Write(ubuf[0:n]) n = binary.PutUvarint(ubuf, p.Uid) h.Write(ubuf[0:n]) h.Write(p.Value) h.Write([]byte(p.Label)) count++ // I think it's okay to take the pointer from the iterator, because we have a lock // over List; which won't be released until final has been marshalled. Thus, the // underlying data wouldn't be changed. final.Postings = append(final.Postings, p) return true }) final.Checksum = h.Sum(nil) data, err := final.Marshal() x.Checkf(err, "Unable to marshal posting list") sw := l.StartWait() ce := commitEntry{ key: l.key, val: data, sw: sw, } commitCh <- ce // Now reset the mutation variables. atomic.StorePointer(&l.pbuffer, nil) // Make prev buffer eligible for GC. atomic.StoreInt64(&l.dirtyTs, 0) // Set as clean. l.mlayer = l.mlayer[:0] l.lastCompact = time.Now() return true, nil }
// processFile sends mutations for a given gz file. func processFile(file string) { fmt.Printf("\nProcessing %s\n", file) f, err := os.Open(file) x.Check(err) defer f.Close() gr, err := gzip.NewReader(f) x.Check(err) hc = http.Client{Timeout: time.Minute} mutation := make(chan string, 3*(*concurrent)) var wg sync.WaitGroup for i := 0; i < *concurrent; i++ { wg.Add(1) go makeRequests(mutation, &wg) } var buf bytes.Buffer bufReader := bufio.NewReader(gr) num := 0 for { err = readLine(bufReader, &buf) if err != nil { break } buf.WriteRune('\n') atomic.AddUint64(&s.rdfs, 1) num++ if num >= *numRdf { mutation <- buf.String() buf.Reset() num = 0 } } if err != io.EOF { x.Checkf(err, "Error while reading file") } if buf.Len() > 0 { mutation <- buf.String() } close(mutation) wg.Wait() }
func batchCommit() { var sz int var waits []*x.SafeWait var loop uint64 b := pstore.NewWriteBatch() defer b.Destroy() for { select { case e := <-commitCh: b.Put(e.key, e.val) sz++ waits = append(waits, e.sw) default: // default is executed if no other case is ready. start := time.Now() if sz > 0 { x.AssertTrue(b != nil) loop++ fmt.Printf("[%4d] Writing batch of size: %v\n", loop, sz) x.Checkf(pstore.WriteBatch(b), "Error while writing to RocksDB.") for _, w := range waits { w.Done() } b.Clear() sz = 0 waits = waits[:0] } // Add a sleep clause to avoid a busy wait loop if there's no input to commitCh. sleepFor := 10*time.Millisecond - time.Since(start) if sleepFor > time.Millisecond { time.Sleep(sleepFor) } } } }
func queryHandler(w http.ResponseWriter, r *http.Request) { // Add a limit on how many pending queries can be run in the system. pendingQueries <- struct{}{} defer func() { <-pendingQueries }() addCorsHeaders(w) if r.Method == "OPTIONS" { return } if r.Method != "POST" { x.SetStatus(w, x.ErrorInvalidMethod, "Invalid method") return } ctx, cancel := context.WithTimeout(context.Background(), time.Minute) defer cancel() if rand.Float64() < *tracing { tr := trace.New("Dgraph", "Query") defer tr.Finish() ctx = trace.NewContext(ctx, tr) } var l query.Latency l.Start = time.Now() defer r.Body.Close() req, err := ioutil.ReadAll(r.Body) q := string(req) if err != nil || len(q) == 0 { x.TraceError(ctx, x.Wrapf(err, "Error while reading query")) x.SetStatus(w, x.ErrorInvalidRequest, "Invalid request encountered.") return } x.Trace(ctx, "Query received: %v", q) gq, mu, err := gql.Parse(q) if err != nil { x.TraceError(ctx, x.Wrapf(err, "Error while parsing query")) x.SetStatus(w, x.ErrorInvalidRequest, err.Error()) return } var allocIds map[string]uint64 var allocIdsStr map[string]string // If we have mutations, run them first. if mu != nil && (len(mu.Set) > 0 || len(mu.Del) > 0) { if allocIds, err = mutationHandler(ctx, mu); err != nil { x.TraceError(ctx, x.Wrapf(err, "Error while handling mutations")) x.SetStatus(w, x.Error, err.Error()) return } // convert the new UIDs to hex string. allocIdsStr = make(map[string]string) for k, v := range allocIds { allocIdsStr[k] = fmt.Sprintf("%#x", v) } } if gq == nil || (gq.UID == 0 && gq.Func == nil && len(gq.XID) == 0) { mp := map[string]interface{}{ "code": x.ErrorOk, "message": "Done", "uids": allocIdsStr, } if js, err := json.Marshal(mp); err == nil { w.Write(js) } else { x.SetStatus(w, "Error", "Unable to marshal map") } return } sg, err := query.ToSubGraph(ctx, gq) if err != nil { x.TraceError(ctx, x.Wrapf(err, "Error while conversion o internal format")) x.SetStatus(w, x.ErrorInvalidRequest, err.Error()) return } l.Parsing = time.Since(l.Start) x.Trace(ctx, "Query parsed") rch := make(chan error) go query.ProcessGraph(ctx, sg, nil, rch) err = <-rch if err != nil { x.TraceError(ctx, x.Wrapf(err, "Error while executing query")) x.SetStatus(w, x.Error, err.Error()) return } l.Processing = time.Since(l.Start) - l.Parsing x.Trace(ctx, "Graph processed") if len(*dumpSubgraph) > 0 { x.Checkf(os.MkdirAll(*dumpSubgraph, 0700), *dumpSubgraph) s := time.Now().Format("20060102.150405.000000.gob") filename := path.Join(*dumpSubgraph, s) f, err := os.Create(filename) x.Checkf(err, filename) enc := gob.NewEncoder(f) x.Check(enc.Encode(sg)) x.Checkf(f.Close(), filename) } js, err := sg.ToJSON(&l) if err != nil { x.TraceError(ctx, x.Wrapf(err, "Error while converting to Json")) x.SetStatus(w, x.Error, err.Error()) return } x.Trace(ctx, "Latencies: Total: %v Parsing: %v Process: %v Json: %v", time.Since(l.Start), l.Parsing, l.Processing, l.Json) w.Header().Set("Content-Type", "application/json") w.Write(js) }