Esempio n. 1
0
File: xrefs.go Progetto: bzz/kythe
func addReverseEdges(ctx context.Context, gs graphstore.Service) error {
	log.Println("Adding reverse edges")
	var (
		totalEntries int
		addedEdges   int
	)
	startTime := time.Now()
	err := gs.Scan(ctx, new(spb.ScanRequest), func(entry *spb.Entry) error {
		kind := entry.EdgeKind
		if kind != "" && schema.EdgeDirection(kind) == schema.Forward {
			if err := gs.Write(ctx, &spb.WriteRequest{
				Source: entry.Target,
				Update: []*spb.WriteRequest_Update{{
					Target:    entry.Source,
					EdgeKind:  schema.MirrorEdge(kind),
					FactName:  entry.FactName,
					FactValue: entry.FactValue,
				}},
			}); err != nil {
				return fmt.Errorf("Failed to write reverse edge: %v", err)
			}
			addedEdges++
		}
		totalEntries++
		return nil
	})
	log.Printf("Wrote %d reverse edges to GraphStore (%d total entries): %v", addedEdges, totalEntries, time.Since(startTime))
	return err
}
Esempio n. 2
0
File: xrefs.go Progetto: bzz/kythe
// EnsureReverseEdges checks if gs contains reverse edges.  If it doesn't, it
// will scan gs for all forward edges, adding a reverse for each back into the
// GraphStore.  This is necessary for a GraphStoreService to work properly.
func EnsureReverseEdges(ctx context.Context, gs graphstore.Service) error {
	var edge *spb.Entry
	if err := gs.Scan(ctx, &spb.ScanRequest{}, func(e *spb.Entry) error {
		if graphstore.IsEdge(e) {
			edge = e
			return io.EOF
		}
		return nil
	}); err != nil {
		return err
	}

	if edge == nil {
		log.Println("No edges found in GraphStore")
		return nil
	} else if schema.EdgeDirection(edge.EdgeKind) == schema.Reverse {
		return nil
	}

	var foundReverse bool
	if err := gs.Read(ctx, &spb.ReadRequest{
		Source:   edge.Target,
		EdgeKind: schema.MirrorEdge(edge.EdgeKind),
	}, func(entry *spb.Entry) error {
		foundReverse = true
		return nil
	}); err != nil {
		return fmt.Errorf("error checking for reverse edge: %v", err)
	}
	if foundReverse {
		return nil
	}
	return addReverseEdges(ctx, gs)
}
Esempio n. 3
0
func writeEntries(ctx context.Context, s graphstore.Service, reqs <-chan *spb.WriteRequest) (uint64, error) {
	var num uint64

	for req := range reqs {
		num += uint64(len(req.Update))
		if err := s.Write(ctx, req); err != nil {
			return 0, err
		}
	}

	return num, nil
}
Esempio n. 4
0
func readEntries(ctx context.Context, gs graphstore.Service, entryFunc graphstore.EntryFunc, edgeKind string, tickets []string) error {
	for _, ticket := range tickets {
		src, err := kytheuri.ToVName(ticket)
		if err != nil {
			return fmt.Errorf("error parsing ticket %q: %v", ticket, err)
		}
		if err := gs.Read(ctx, &spb.ReadRequest{
			Source:   src,
			EdgeKind: edgeKind,
		}, entryFunc); err != nil {
			return fmt.Errorf("GraphStore Read error for ticket %q: %v", ticket, err)
		}
	}
	return nil
}
Esempio n. 5
0
// getEdges returns edgeTargets with the given node as their source.  Only edge
// entries that return true when applied to pred are returned.
func getEdges(ctx context.Context, gs graphstore.Service, node *spb.VName, pred func(*spb.Entry) bool) ([]*edgeTarget, error) {
	var targets []*edgeTarget

	if err := gs.Read(ctx, &spb.ReadRequest{
		Source:   node,
		EdgeKind: "*",
	}, func(entry *spb.Entry) error {
		if graphstore.IsEdge(entry) && pred(entry) {
			targets = append(targets, &edgeTarget{entry.EdgeKind, entry.Target})
		}
		return nil
	}); err != nil {
		return nil, fmt.Errorf("read error: %v", err)
	}
	return targets, nil
}
Esempio n. 6
0
// Populate adds each file node in gs to m.
func (m *Map) Populate(ctx context.Context, gs graphstore.Service) error {
	start := time.Now()
	log.Println("Populating in-memory file tree")
	var total int
	if err := gs.Scan(ctx, &spb.ScanRequest{FactPrefix: schema.NodeKindFact},
		func(entry *spb.Entry) error {
			if entry.FactName == schema.NodeKindFact && string(entry.FactValue) == schema.FileKind {
				m.AddFile(entry.Source)
				total++
			}
			return nil
		}); err != nil {
		return fmt.Errorf("failed to Scan GraphStore for directory structure: %v", err)
	}
	log.Printf("Indexed %d files in %s", total, time.Since(start))
	return nil
}
Esempio n. 7
0
func scanEntries(ctx context.Context, gs graphstore.Service, entryFunc graphstore.EntryFunc, edgeKind, targetTicket, factPrefix string) error {
	var target *spb.VName
	var err error
	if targetTicket != "" {
		target, err = kytheuri.ToVName(targetTicket)
		if err != nil {
			return fmt.Errorf("error parsing --target %q: %v", targetTicket, err)
		}
	}
	if err := gs.Scan(ctx, &spb.ScanRequest{
		EdgeKind:   edgeKind,
		FactPrefix: factPrefix,
		Target:     target,
	}, entryFunc); err != nil {
		return fmt.Errorf("GraphStore Scan error: %v", err)
	}
	return nil
}
Esempio n. 8
0
func writeWithReverses(ctx context.Context, gs graphstore.Service, req *spb.WriteRequest) error {
	if err := gs.Write(ctx, req); err != nil {
		return fmt.Errorf("error writing edges: %v", err)
	}
	for _, u := range req.Update {
		if err := gs.Write(ctx, &spb.WriteRequest{
			Source: u.Target,
			Update: []*spb.WriteRequest_Update{{
				Target:    req.Source,
				EdgeKind:  schema.MirrorEdge(u.EdgeKind),
				FactName:  u.FactName,
				FactValue: u.FactValue,
			}},
		}); err != nil {
			return fmt.Errorf("error writing rev edge: %v", err)
		}
	}
	return nil
}
Esempio n. 9
0
File: xrefs.go Progetto: bzz/kythe
func getSourceText(ctx context.Context, gs graphstore.Service, fileVName *spb.VName) (text []byte, encoding string, err error) {
	if err := gs.Read(ctx, &spb.ReadRequest{Source: fileVName}, func(entry *spb.Entry) error {
		switch entry.FactName {
		case schema.TextFact:
			text = entry.FactValue
		case schema.TextEncodingFact:
			encoding = string(entry.FactValue)
		default:
			// skip other file facts
		}
		return nil
	}); err != nil {
		return nil, "", fmt.Errorf("read error: %v", err)
	}
	if text == nil {
		err = fmt.Errorf("file not found: %+v", fileVName)
	}
	return
}
Esempio n. 10
0
// Run writes the xrefs and filetree serving tables to db based on the given
// graphstore.Service.
func Run(ctx context.Context, gs graphstore.Service, db keyvalue.DB) error {
	log.Println("Starting serving pipeline")
	tbl := &table.KVProto{db}

	// TODO(schroederc): for large corpora, this won't fit in memory
	var files []string

	entries := make(chan *spb.Entry)
	ftIn, nIn, eIn := make(chan *spb.VName), make(chan *spb.Entry), make(chan *spb.Entry)
	go func() {
		for entry := range entries {
			if entry.EdgeKind == "" {
				nIn <- entry
				if entry.FactName == schema.NodeKindFact && string(entry.FactValue) == "file" {
					ftIn <- entry.Source
					files = append(files, kytheuri.ToString(entry.Source))
				}
			} else {
				eIn <- entry
			}
		}
		close(ftIn)
		close(nIn)
		close(eIn)
	}()
	log.Println("Scanning GraphStore")
	var sErr error
	go func() {
		sErr = gs.Scan(ctx, &spb.ScanRequest{}, func(e *spb.Entry) error {
			entries <- e
			return nil
		})
		close(entries)
	}()

	var (
		ftErr, nErr, eErr error
		ftWG, edgeNodeWG  sync.WaitGroup
	)
	ftWG.Add(1)
	go func() {
		defer ftWG.Done()
		ftErr = writeFileTree(ctx, tbl, ftIn)
		log.Println("Wrote FileTree")
	}()
	edgeNodeWG.Add(2)
	nodes := make(chan *srvpb.Node)
	go func() {
		defer edgeNodeWG.Done()
		nErr = writeNodes(tbl, nIn, nodes)
		log.Println("Wrote Nodes")
	}()
	go func() {
		defer edgeNodeWG.Done()
		eErr = writeEdges(ctx, tbl, eIn)
		log.Println("Wrote Edges")
	}()

	var (
		idxWG  sync.WaitGroup
		idxErr error
	)
	idxWG.Add(1)
	go func() {
		defer idxWG.Done()
		idxErr = writeIndex(&table.KVInverted{db}, nodes)
		log.Println("Wrote Search Index")
	}()

	edgeNodeWG.Wait()
	if eErr != nil {
		return eErr
	} else if nErr != nil {
		return nErr
	}

	es := xrefs.NodesEdgesService(&xsrv.Table{tbl})
	if err := writeDecorations(ctx, tbl, es, files); err != nil {
		return err
	}

	ftWG.Wait()
	if ftErr != nil {
		return ftErr
	}
	idxWG.Wait()
	if idxErr != nil {
		return idxErr
	}

	return sErr
}
Esempio n. 11
0
func writeEdgePages(ctx context.Context, t table.Proto, gs graphstore.Service) error {
	// TODO(schroederc): spill large PagedEdgeSets into EdgePages
	log.Println("Writing EdgeSets")
	var (
		lastSrc  *spb.VName
		pes      *srvpb.PagedEdgeSet
		grp      *srvpb.EdgeSet_Group
		pesTotal int
	)
	if err := gs.Scan(ctx, new(spb.ScanRequest), func(e *spb.Entry) error {
		if e.EdgeKind == "" {
			panic("non-edge entry")
		}

		if pes != nil && !compare.VNamesEqual(lastSrc, e.Source) {
			if grp != nil {
				pes.EdgeSet.Group = append(pes.EdgeSet.Group, grp)
				pesTotal += len(grp.TargetTicket)
			}
			pes.TotalEdges = int32(pesTotal)
			if err := t.Put(xsrv.EdgeSetKey(pes.EdgeSet.SourceTicket), pes); err != nil {
				return err
			}
			pes = nil
			grp = nil
			pesTotal = 0
		}
		if pes == nil {
			pes = &srvpb.PagedEdgeSet{
				EdgeSet: &srvpb.EdgeSet{
					SourceTicket: kytheuri.ToString(e.Source),
				},
			}
		}

		if grp != nil && grp.Kind != e.EdgeKind {
			pes.EdgeSet.Group = append(pes.EdgeSet.Group, grp)
			pesTotal += len(grp.TargetTicket)
			grp = nil
		}
		if grp == nil {
			grp = &srvpb.EdgeSet_Group{
				Kind: e.EdgeKind,
			}
		}

		grp.TargetTicket = append(grp.TargetTicket, kytheuri.ToString(e.Target))
		lastSrc = e.Source
		return nil
	}); err != nil {
		return err
	}
	if pes != nil {
		if grp != nil {
			pes.EdgeSet.Group = append(pes.EdgeSet.Group, grp)
			pesTotal += len(grp.TargetTicket)
		}
		pes.TotalEdges = int32(pesTotal)
		if err := t.Put(xsrv.EdgeSetKey(pes.EdgeSet.SourceTicket), pes); err != nil {
			return err
		}
	}
	return nil
}
Esempio n. 12
0
// Run writes the xrefs and filetree serving tables to db based on the given
// graphstore.Service.
func Run(ctx context.Context, gs graphstore.Service, db keyvalue.DB, opts *Options) error {
	if opts == nil {
		opts = new(Options)
	}

	log.Println("Starting serving pipeline")

	out := &servingOutput{
		xs:  table.ProtoBatchParallel{&table.KVProto{DB: db}},
		idx: &table.KVInverted{DB: db},
	}
	entries := make(chan *spb.Entry, chBuf)

	var cErr error
	var wg sync.WaitGroup
	var sortedEdges disksort.Interface
	wg.Add(1)
	go func() {
		sortedEdges, cErr = combineNodesAndEdges(ctx, opts, out, entries)
		if cErr != nil {
			cErr = fmt.Errorf("error combining nodes and edges: %v", cErr)
		}
		wg.Done()
	}()

	err := gs.Scan(ctx, &spb.ScanRequest{}, func(e *spb.Entry) error {
		if graphstore.IsNodeFact(e) || schema.EdgeDirection(e.EdgeKind) == schema.Forward {
			entries <- e
		}
		return nil
	})
	close(entries)
	if err != nil {
		return fmt.Errorf("error scanning GraphStore: %v", err)
	}

	wg.Wait()
	if cErr != nil {
		return cErr
	}

	pesIn, dIn := make(chan *srvpb.Edge, chBuf), make(chan *srvpb.Edge, chBuf)
	var pErr, fErr error
	wg.Add(2)
	go func() {
		defer wg.Done()
		if err := writePagedEdges(ctx, pesIn, out.xs, opts); err != nil {
			pErr = fmt.Errorf("error writing paged edge sets: %v", err)
		}
	}()
	go func() {
		defer wg.Done()
		if err := writeDecorAndRefs(ctx, opts, dIn, out); err != nil {
			fErr = fmt.Errorf("error writing file decorations: %v", err)
		}
	}()

	err = sortedEdges.Read(func(x interface{}) error {
		e := x.(*srvpb.Edge)
		pesIn <- e
		dIn <- e
		return nil
	})
	close(pesIn)
	close(dIn)
	if err != nil {
		return fmt.Errorf("error reading edges table: %v", err)
	}

	wg.Wait()
	if pErr != nil {
		return pErr
	}
	return fErr
}
Esempio n. 13
0
// Run writes the xrefs and filetree serving tables to db based on the given
// graphstore.Service.
func Run(ctx context.Context, gs graphstore.Service, db keyvalue.DB, opts *Options) error {
	if opts == nil {
		opts = new(Options)
	}

	log.Println("Starting serving pipeline")
	tbl := table.ProtoBatchParallel{&table.KVProto{db}}

	entries := make(chan *spb.Entry, chBuf)

	ftIn := make(chan *spb.VName, chBuf)
	nIn, eIn, dIn := make(chan *spb.Entry, chBuf), make(chan *spb.Entry, chBuf), make(chan *spb.Entry, chBuf)

	go func() {
		for entry := range entries {
			if graphstore.IsNodeFact(entry) {
				nIn <- entry
				if entry.FactName == schema.NodeKindFact && string(entry.FactValue) == "file" {
					ftIn <- entry.Source
				}
			} else {
				eIn <- entry
			}
			dIn <- entry
		}
		close(ftIn)
		close(nIn)
		close(eIn)
		close(dIn)
	}()
	log.Println("Scanning GraphStore")
	var sErr error
	go func() {
		sErr = gs.Scan(ctx, &spb.ScanRequest{}, func(e *spb.Entry) error {
			entries <- e
			return nil
		})
		if sErr != nil {
			sErr = fmt.Errorf("error scanning GraphStore: %v", sErr)
		}
		close(entries)
	}()

	var (
		ftErr, nErr, eErr, dErr error
		ftWG, xrefsWG           sync.WaitGroup
	)
	ftWG.Add(1)
	go func() {
		defer ftWG.Done()
		ftErr = writeFileTree(ctx, tbl, ftIn)
		if ftErr != nil {
			ftErr = fmt.Errorf("error writing FileTree: %v", ftErr)
		} else {
			log.Println("Wrote FileTree")
		}
	}()
	xrefsWG.Add(3)
	nodes := make(chan *srvpb.Node)
	go func() {
		defer xrefsWG.Done()
		nErr = writeNodes(ctx, tbl, nIn, nodes)
		if nErr != nil {
			nErr = fmt.Errorf("error writing Nodes: %v", nErr)
		} else {
			log.Println("Wrote Nodes")
		}
	}()
	go func() {
		defer xrefsWG.Done()
		eErr = writeEdges(ctx, tbl, eIn, opts.MaxEdgePageSize)
		if eErr != nil {
			eErr = fmt.Errorf("error writing Edges: %v", eErr)
		} else {
			log.Println("Wrote Edges")
		}
	}()
	go func() {
		defer xrefsWG.Done()
		dErr = writeDecorations(ctx, tbl, dIn)
		if dErr != nil {
			dErr = fmt.Errorf("error writing FileDecorations: %v", dErr)
		} else {
			log.Println("Wrote Decorations")
		}
	}()

	var (
		idxWG  sync.WaitGroup
		idxErr error
	)
	idxWG.Add(1)
	go func() {
		defer idxWG.Done()
		idxErr = writeIndex(ctx, &table.KVInverted{db}, nodes)
		if idxErr != nil {
			idxErr = fmt.Errorf("error writing Search Index: %v", idxErr)
		} else {
			log.Println("Wrote Search Index")
		}
	}()

	xrefsWG.Wait()
	if eErr != nil {
		return eErr
	} else if nErr != nil {
		return nErr
	} else if dErr != nil {
		return dErr
	}

	ftWG.Wait()
	if ftErr != nil {
		return ftErr
	}
	idxWG.Wait()
	if idxErr != nil {
		return idxErr
	}

	return sErr
}
Esempio n. 14
0
// Run writes the xrefs and filetree serving tables to db based on the given
// graphstore.Service.
func Run(ctx context.Context, gs graphstore.Service, db keyvalue.DB, opts *Options) error {
	if opts == nil {
		opts = new(Options)
	}

	log.Println("Starting serving pipeline")

	edges, err := tempTable("complete.edges")
	if err != nil {
		return err
	}
	defer func() {
		if err := edges.Close(); err != nil {
			log.Printf("Error closing edges table: %v", err)
		}
	}()

	out := &servingOutput{
		xs:  table.ProtoBatchParallel{&table.KVProto{db}},
		idx: &table.KVInverted{db},

		completeEdges: &table.KVProto{edges},
	}
	entries := make(chan *spb.Entry, chBuf)

	var cErr error
	var wg sync.WaitGroup
	wg.Add(1)
	go func() {
		cErr = combineNodesAndEdges(ctx, out, entries)
		if cErr != nil {
			cErr = fmt.Errorf("error combining nodes and edges: %v", cErr)
		}
		wg.Done()
	}()

	err = gs.Scan(ctx, &spb.ScanRequest{}, func(e *spb.Entry) error {
		entries <- e
		return nil
	})
	close(entries)
	if err != nil {
		return fmt.Errorf("error scanning GraphStore: %v", err)
	}

	wg.Wait()
	if cErr != nil {
		return cErr
	}

	pesIn, dIn := make(chan *srvpb.Edge, chBuf), make(chan *srvpb.Edge, chBuf)
	var pErr, fErr error
	wg.Add(2)
	go func() {
		defer wg.Done()
		if err := writePagedEdges(ctx, pesIn, out.xs, opts.MaxEdgePageSize); err != nil {
			pErr = fmt.Errorf("error writing paged edge sets: %v", err)
		}
	}()
	go func() {
		defer wg.Done()
		if err := writeFileDecorations(ctx, dIn, out); err != nil {
			fErr = fmt.Errorf("error writing file decorations: %v", err)
		}
	}()

	if err := readCompletedEdges(ctx, out.completeEdges, pesIn, dIn); err != nil {
		return fmt.Errorf("error reading edges table: %v", err)
	}

	wg.Wait()
	if pErr != nil {
		return pErr
	}
	return fErr
}
Esempio n. 15
0
// LogClose closes gs and logs any resulting error.
func LogClose(ctx context.Context, gs graphstore.Service) {
	if err := gs.Close(ctx); err != nil {
		log.Printf("GraphStore failed to close: %v", err)
	}
}