func addReverseEdges(ctx context.Context, gs graphstore.Service) error { log.Println("Adding reverse edges") var ( totalEntries int addedEdges int ) startTime := time.Now() err := gs.Scan(ctx, new(spb.ScanRequest), func(entry *spb.Entry) error { kind := entry.EdgeKind if kind != "" && schema.EdgeDirection(kind) == schema.Forward { if err := gs.Write(ctx, &spb.WriteRequest{ Source: entry.Target, Update: []*spb.WriteRequest_Update{{ Target: entry.Source, EdgeKind: schema.MirrorEdge(kind), FactName: entry.FactName, FactValue: entry.FactValue, }}, }); err != nil { return fmt.Errorf("Failed to write reverse edge: %v", err) } addedEdges++ } totalEntries++ return nil }) log.Printf("Wrote %d reverse edges to GraphStore (%d total entries): %v", addedEdges, totalEntries, time.Since(startTime)) return err }
// EnsureReverseEdges checks if gs contains reverse edges. If it doesn't, it // will scan gs for all forward edges, adding a reverse for each back into the // GraphStore. This is necessary for a GraphStoreService to work properly. func EnsureReverseEdges(ctx context.Context, gs graphstore.Service) error { var edge *spb.Entry if err := gs.Scan(ctx, &spb.ScanRequest{}, func(e *spb.Entry) error { if graphstore.IsEdge(e) { edge = e return io.EOF } return nil }); err != nil { return err } if edge == nil { log.Println("No edges found in GraphStore") return nil } else if schema.EdgeDirection(edge.EdgeKind) == schema.Reverse { return nil } var foundReverse bool if err := gs.Read(ctx, &spb.ReadRequest{ Source: edge.Target, EdgeKind: schema.MirrorEdge(edge.EdgeKind), }, func(entry *spb.Entry) error { foundReverse = true return nil }); err != nil { return fmt.Errorf("error checking for reverse edge: %v", err) } if foundReverse { return nil } return addReverseEdges(ctx, gs) }
func writeEntries(ctx context.Context, s graphstore.Service, reqs <-chan *spb.WriteRequest) (uint64, error) { var num uint64 for req := range reqs { num += uint64(len(req.Update)) if err := s.Write(ctx, req); err != nil { return 0, err } } return num, nil }
func readEntries(ctx context.Context, gs graphstore.Service, entryFunc graphstore.EntryFunc, edgeKind string, tickets []string) error { for _, ticket := range tickets { src, err := kytheuri.ToVName(ticket) if err != nil { return fmt.Errorf("error parsing ticket %q: %v", ticket, err) } if err := gs.Read(ctx, &spb.ReadRequest{ Source: src, EdgeKind: edgeKind, }, entryFunc); err != nil { return fmt.Errorf("GraphStore Read error for ticket %q: %v", ticket, err) } } return nil }
// getEdges returns edgeTargets with the given node as their source. Only edge // entries that return true when applied to pred are returned. func getEdges(ctx context.Context, gs graphstore.Service, node *spb.VName, pred func(*spb.Entry) bool) ([]*edgeTarget, error) { var targets []*edgeTarget if err := gs.Read(ctx, &spb.ReadRequest{ Source: node, EdgeKind: "*", }, func(entry *spb.Entry) error { if graphstore.IsEdge(entry) && pred(entry) { targets = append(targets, &edgeTarget{entry.EdgeKind, entry.Target}) } return nil }); err != nil { return nil, fmt.Errorf("read error: %v", err) } return targets, nil }
// Populate adds each file node in gs to m. func (m *Map) Populate(ctx context.Context, gs graphstore.Service) error { start := time.Now() log.Println("Populating in-memory file tree") var total int if err := gs.Scan(ctx, &spb.ScanRequest{FactPrefix: schema.NodeKindFact}, func(entry *spb.Entry) error { if entry.FactName == schema.NodeKindFact && string(entry.FactValue) == schema.FileKind { m.AddFile(entry.Source) total++ } return nil }); err != nil { return fmt.Errorf("failed to Scan GraphStore for directory structure: %v", err) } log.Printf("Indexed %d files in %s", total, time.Since(start)) return nil }
func scanEntries(ctx context.Context, gs graphstore.Service, entryFunc graphstore.EntryFunc, edgeKind, targetTicket, factPrefix string) error { var target *spb.VName var err error if targetTicket != "" { target, err = kytheuri.ToVName(targetTicket) if err != nil { return fmt.Errorf("error parsing --target %q: %v", targetTicket, err) } } if err := gs.Scan(ctx, &spb.ScanRequest{ EdgeKind: edgeKind, FactPrefix: factPrefix, Target: target, }, entryFunc); err != nil { return fmt.Errorf("GraphStore Scan error: %v", err) } return nil }
func writeWithReverses(ctx context.Context, gs graphstore.Service, req *spb.WriteRequest) error { if err := gs.Write(ctx, req); err != nil { return fmt.Errorf("error writing edges: %v", err) } for _, u := range req.Update { if err := gs.Write(ctx, &spb.WriteRequest{ Source: u.Target, Update: []*spb.WriteRequest_Update{{ Target: req.Source, EdgeKind: schema.MirrorEdge(u.EdgeKind), FactName: u.FactName, FactValue: u.FactValue, }}, }); err != nil { return fmt.Errorf("error writing rev edge: %v", err) } } return nil }
func getSourceText(ctx context.Context, gs graphstore.Service, fileVName *spb.VName) (text []byte, encoding string, err error) { if err := gs.Read(ctx, &spb.ReadRequest{Source: fileVName}, func(entry *spb.Entry) error { switch entry.FactName { case schema.TextFact: text = entry.FactValue case schema.TextEncodingFact: encoding = string(entry.FactValue) default: // skip other file facts } return nil }); err != nil { return nil, "", fmt.Errorf("read error: %v", err) } if text == nil { err = fmt.Errorf("file not found: %+v", fileVName) } return }
// Run writes the xrefs and filetree serving tables to db based on the given // graphstore.Service. func Run(ctx context.Context, gs graphstore.Service, db keyvalue.DB) error { log.Println("Starting serving pipeline") tbl := &table.KVProto{db} // TODO(schroederc): for large corpora, this won't fit in memory var files []string entries := make(chan *spb.Entry) ftIn, nIn, eIn := make(chan *spb.VName), make(chan *spb.Entry), make(chan *spb.Entry) go func() { for entry := range entries { if entry.EdgeKind == "" { nIn <- entry if entry.FactName == schema.NodeKindFact && string(entry.FactValue) == "file" { ftIn <- entry.Source files = append(files, kytheuri.ToString(entry.Source)) } } else { eIn <- entry } } close(ftIn) close(nIn) close(eIn) }() log.Println("Scanning GraphStore") var sErr error go func() { sErr = gs.Scan(ctx, &spb.ScanRequest{}, func(e *spb.Entry) error { entries <- e return nil }) close(entries) }() var ( ftErr, nErr, eErr error ftWG, edgeNodeWG sync.WaitGroup ) ftWG.Add(1) go func() { defer ftWG.Done() ftErr = writeFileTree(ctx, tbl, ftIn) log.Println("Wrote FileTree") }() edgeNodeWG.Add(2) nodes := make(chan *srvpb.Node) go func() { defer edgeNodeWG.Done() nErr = writeNodes(tbl, nIn, nodes) log.Println("Wrote Nodes") }() go func() { defer edgeNodeWG.Done() eErr = writeEdges(ctx, tbl, eIn) log.Println("Wrote Edges") }() var ( idxWG sync.WaitGroup idxErr error ) idxWG.Add(1) go func() { defer idxWG.Done() idxErr = writeIndex(&table.KVInverted{db}, nodes) log.Println("Wrote Search Index") }() edgeNodeWG.Wait() if eErr != nil { return eErr } else if nErr != nil { return nErr } es := xrefs.NodesEdgesService(&xsrv.Table{tbl}) if err := writeDecorations(ctx, tbl, es, files); err != nil { return err } ftWG.Wait() if ftErr != nil { return ftErr } idxWG.Wait() if idxErr != nil { return idxErr } return sErr }
func writeEdgePages(ctx context.Context, t table.Proto, gs graphstore.Service) error { // TODO(schroederc): spill large PagedEdgeSets into EdgePages log.Println("Writing EdgeSets") var ( lastSrc *spb.VName pes *srvpb.PagedEdgeSet grp *srvpb.EdgeSet_Group pesTotal int ) if err := gs.Scan(ctx, new(spb.ScanRequest), func(e *spb.Entry) error { if e.EdgeKind == "" { panic("non-edge entry") } if pes != nil && !compare.VNamesEqual(lastSrc, e.Source) { if grp != nil { pes.EdgeSet.Group = append(pes.EdgeSet.Group, grp) pesTotal += len(grp.TargetTicket) } pes.TotalEdges = int32(pesTotal) if err := t.Put(xsrv.EdgeSetKey(pes.EdgeSet.SourceTicket), pes); err != nil { return err } pes = nil grp = nil pesTotal = 0 } if pes == nil { pes = &srvpb.PagedEdgeSet{ EdgeSet: &srvpb.EdgeSet{ SourceTicket: kytheuri.ToString(e.Source), }, } } if grp != nil && grp.Kind != e.EdgeKind { pes.EdgeSet.Group = append(pes.EdgeSet.Group, grp) pesTotal += len(grp.TargetTicket) grp = nil } if grp == nil { grp = &srvpb.EdgeSet_Group{ Kind: e.EdgeKind, } } grp.TargetTicket = append(grp.TargetTicket, kytheuri.ToString(e.Target)) lastSrc = e.Source return nil }); err != nil { return err } if pes != nil { if grp != nil { pes.EdgeSet.Group = append(pes.EdgeSet.Group, grp) pesTotal += len(grp.TargetTicket) } pes.TotalEdges = int32(pesTotal) if err := t.Put(xsrv.EdgeSetKey(pes.EdgeSet.SourceTicket), pes); err != nil { return err } } return nil }
// Run writes the xrefs and filetree serving tables to db based on the given // graphstore.Service. func Run(ctx context.Context, gs graphstore.Service, db keyvalue.DB, opts *Options) error { if opts == nil { opts = new(Options) } log.Println("Starting serving pipeline") out := &servingOutput{ xs: table.ProtoBatchParallel{&table.KVProto{DB: db}}, idx: &table.KVInverted{DB: db}, } entries := make(chan *spb.Entry, chBuf) var cErr error var wg sync.WaitGroup var sortedEdges disksort.Interface wg.Add(1) go func() { sortedEdges, cErr = combineNodesAndEdges(ctx, opts, out, entries) if cErr != nil { cErr = fmt.Errorf("error combining nodes and edges: %v", cErr) } wg.Done() }() err := gs.Scan(ctx, &spb.ScanRequest{}, func(e *spb.Entry) error { if graphstore.IsNodeFact(e) || schema.EdgeDirection(e.EdgeKind) == schema.Forward { entries <- e } return nil }) close(entries) if err != nil { return fmt.Errorf("error scanning GraphStore: %v", err) } wg.Wait() if cErr != nil { return cErr } pesIn, dIn := make(chan *srvpb.Edge, chBuf), make(chan *srvpb.Edge, chBuf) var pErr, fErr error wg.Add(2) go func() { defer wg.Done() if err := writePagedEdges(ctx, pesIn, out.xs, opts); err != nil { pErr = fmt.Errorf("error writing paged edge sets: %v", err) } }() go func() { defer wg.Done() if err := writeDecorAndRefs(ctx, opts, dIn, out); err != nil { fErr = fmt.Errorf("error writing file decorations: %v", err) } }() err = sortedEdges.Read(func(x interface{}) error { e := x.(*srvpb.Edge) pesIn <- e dIn <- e return nil }) close(pesIn) close(dIn) if err != nil { return fmt.Errorf("error reading edges table: %v", err) } wg.Wait() if pErr != nil { return pErr } return fErr }
// Run writes the xrefs and filetree serving tables to db based on the given // graphstore.Service. func Run(ctx context.Context, gs graphstore.Service, db keyvalue.DB, opts *Options) error { if opts == nil { opts = new(Options) } log.Println("Starting serving pipeline") tbl := table.ProtoBatchParallel{&table.KVProto{db}} entries := make(chan *spb.Entry, chBuf) ftIn := make(chan *spb.VName, chBuf) nIn, eIn, dIn := make(chan *spb.Entry, chBuf), make(chan *spb.Entry, chBuf), make(chan *spb.Entry, chBuf) go func() { for entry := range entries { if graphstore.IsNodeFact(entry) { nIn <- entry if entry.FactName == schema.NodeKindFact && string(entry.FactValue) == "file" { ftIn <- entry.Source } } else { eIn <- entry } dIn <- entry } close(ftIn) close(nIn) close(eIn) close(dIn) }() log.Println("Scanning GraphStore") var sErr error go func() { sErr = gs.Scan(ctx, &spb.ScanRequest{}, func(e *spb.Entry) error { entries <- e return nil }) if sErr != nil { sErr = fmt.Errorf("error scanning GraphStore: %v", sErr) } close(entries) }() var ( ftErr, nErr, eErr, dErr error ftWG, xrefsWG sync.WaitGroup ) ftWG.Add(1) go func() { defer ftWG.Done() ftErr = writeFileTree(ctx, tbl, ftIn) if ftErr != nil { ftErr = fmt.Errorf("error writing FileTree: %v", ftErr) } else { log.Println("Wrote FileTree") } }() xrefsWG.Add(3) nodes := make(chan *srvpb.Node) go func() { defer xrefsWG.Done() nErr = writeNodes(ctx, tbl, nIn, nodes) if nErr != nil { nErr = fmt.Errorf("error writing Nodes: %v", nErr) } else { log.Println("Wrote Nodes") } }() go func() { defer xrefsWG.Done() eErr = writeEdges(ctx, tbl, eIn, opts.MaxEdgePageSize) if eErr != nil { eErr = fmt.Errorf("error writing Edges: %v", eErr) } else { log.Println("Wrote Edges") } }() go func() { defer xrefsWG.Done() dErr = writeDecorations(ctx, tbl, dIn) if dErr != nil { dErr = fmt.Errorf("error writing FileDecorations: %v", dErr) } else { log.Println("Wrote Decorations") } }() var ( idxWG sync.WaitGroup idxErr error ) idxWG.Add(1) go func() { defer idxWG.Done() idxErr = writeIndex(ctx, &table.KVInverted{db}, nodes) if idxErr != nil { idxErr = fmt.Errorf("error writing Search Index: %v", idxErr) } else { log.Println("Wrote Search Index") } }() xrefsWG.Wait() if eErr != nil { return eErr } else if nErr != nil { return nErr } else if dErr != nil { return dErr } ftWG.Wait() if ftErr != nil { return ftErr } idxWG.Wait() if idxErr != nil { return idxErr } return sErr }
// Run writes the xrefs and filetree serving tables to db based on the given // graphstore.Service. func Run(ctx context.Context, gs graphstore.Service, db keyvalue.DB, opts *Options) error { if opts == nil { opts = new(Options) } log.Println("Starting serving pipeline") edges, err := tempTable("complete.edges") if err != nil { return err } defer func() { if err := edges.Close(); err != nil { log.Printf("Error closing edges table: %v", err) } }() out := &servingOutput{ xs: table.ProtoBatchParallel{&table.KVProto{db}}, idx: &table.KVInverted{db}, completeEdges: &table.KVProto{edges}, } entries := make(chan *spb.Entry, chBuf) var cErr error var wg sync.WaitGroup wg.Add(1) go func() { cErr = combineNodesAndEdges(ctx, out, entries) if cErr != nil { cErr = fmt.Errorf("error combining nodes and edges: %v", cErr) } wg.Done() }() err = gs.Scan(ctx, &spb.ScanRequest{}, func(e *spb.Entry) error { entries <- e return nil }) close(entries) if err != nil { return fmt.Errorf("error scanning GraphStore: %v", err) } wg.Wait() if cErr != nil { return cErr } pesIn, dIn := make(chan *srvpb.Edge, chBuf), make(chan *srvpb.Edge, chBuf) var pErr, fErr error wg.Add(2) go func() { defer wg.Done() if err := writePagedEdges(ctx, pesIn, out.xs, opts.MaxEdgePageSize); err != nil { pErr = fmt.Errorf("error writing paged edge sets: %v", err) } }() go func() { defer wg.Done() if err := writeFileDecorations(ctx, dIn, out); err != nil { fErr = fmt.Errorf("error writing file decorations: %v", err) } }() if err := readCompletedEdges(ctx, out.completeEdges, pesIn, dIn); err != nil { return fmt.Errorf("error reading edges table: %v", err) } wg.Wait() if pErr != nil { return pErr } return fErr }
// LogClose closes gs and logs any resulting error. func LogClose(ctx context.Context, gs graphstore.Service) { if err := gs.Close(ctx); err != nil { log.Printf("GraphStore failed to close: %v", err) } }