コード例 #1
0
ファイル: pipeline.go プロジェクト: benjyw/kythe
func filterReverses(rd stream.EntryReader) stream.EntryReader {
	return func(f func(*spb.Entry) error) error {
		return rd(func(e *spb.Entry) error {
			if graphstore.IsNodeFact(e) || edges.IsForward(e.EdgeKind) {
				return f(e)
			}
			return nil
		})
	}
}
コード例 #2
0
ファイル: pipeline.go プロジェクト: bowlofstew/kythe
func filterReverses(rd stream.EntryReader) stream.EntryReader {
	return func(f func(*spb.Entry) error) error {
		return rd(func(e *spb.Entry) error {
			if graphstore.IsNodeFact(e) || schema.EdgeDirection(e.EdgeKind) == schema.Forward {
				return f(e)
			}
			return nil
		})
	}
}
コード例 #3
0
ファイル: sql.go プロジェクト: herberteuler/kythe
func scanEntries(rows *sql.Rows, f graphstore.EntryFunc) error {
	for rows.Next() {
		entry := &spb.Entry{
			Source: &spb.VName{},
			Target: &spb.VName{},
		}
		err := rows.Scan(
			&entry.Source.Signature,
			&entry.Source.Corpus,
			&entry.Source.Root,
			&entry.Source.Path,
			&entry.Source.Language,
			&entry.EdgeKind,
			&entry.FactName,
			&entry.Target.Signature,
			&entry.Target.Corpus,
			&entry.Target.Root,
			&entry.Target.Path,
			&entry.Target.Language,
			&entry.FactValue)
		if err != nil {
			rows.Close() // ignore errors
			return err
		}
		if graphstore.IsNodeFact(entry) {
			entry.Target = nil
		}
		if err := f(entry); err == io.EOF {
			rows.Close()
			return nil
		} else if err != nil {
			rows.Close()
			return err
		}
	}
	return rows.Close()
}
コード例 #4
0
ファイル: pq_build.go プロジェクト: benjyw/kythe
func (d *DB) copyEntries(entries <-chan *spb.Entry) error {
	// Start a transaction for a COPY statement per table
	nodesTx, err := d.Begin()
	if err != nil {
		return err
	}
	edgesTx, err := d.Begin()
	if err != nil {
		return err
	}

	// Create each table in their corresponding transactions to speed up COPY
	if _, err := nodesTx.Exec(createNodesTable); err != nil {
		return fmt.Errorf("error truncating Nodes table: %v", err)
	} else if _, err := edgesTx.Exec(createEdgeTable); err != nil {
		return fmt.Errorf("error truncating Edges table: %v", err)
	}

	copyNode, err := nodesTx.Prepare(pq.CopyIn(
		"nodes",
		"ticket",
		"node_kind",
		"subkind",
		"text",
		"text_encoding",
		"start_offset",
		"end_offset",
		"snippet_start",
		"snippet_end",
		"other_facts_num",
		"other_facts",
	))
	if err != nil {
		return fmt.Errorf("error preparing Nodes copy: %v", err)
	}

	copyEdge, err := edgesTx.Prepare(pq.CopyIn(
		"edges",
		"source",
		"kind",
		"target",
		"ordinal",
	))
	if err != nil {
		return fmt.Errorf("error preparing Edges copy: %v", err)
	}

	var node srvpb.Node
	var nodeKind string
	var subkind, textEncoding *string
	var text *[]byte
	var startOffset, endOffset, snippetStart, snippetEnd *int64
	for e := range entries {
		if graphstore.IsNodeFact(e) {
			ticket := kytheuri.ToString(e.Source)
			if node.Ticket != "" && node.Ticket != ticket {
				nodeTicket := node.Ticket
				node.Ticket = ""
				var rec []byte
				if len(node.Fact) > 0 {
					rec, err = proto.Marshal(&node)
					if err != nil {
						return fmt.Errorf("error marshaling facts: %v", err)
					}
				}
				if text != nil && textEncoding == nil {
					textEncoding = proto.String(facts.DefaultTextEncoding)
				}
				if _, err := copyNode.Exec(
					nodeTicket,
					nodeKind,
					subkind,
					text,
					textEncoding,
					startOffset,
					endOffset,
					snippetStart,
					snippetEnd,
					len(node.Fact),
					rec,
				); err != nil {
					return fmt.Errorf("error copying node: %v", err)
				}
				node.Fact, text = node.Fact[0:0], nil
				nodeKind = ""
				subkind, textEncoding = nil, nil
				startOffset, endOffset, snippetStart, snippetEnd = nil, nil, nil, nil
			}
			if node.Ticket == "" {
				node.Ticket = ticket
			}
			switch e.FactName {
			case facts.NodeKind:
				nodeKind = string(e.FactValue)
			case facts.Subkind:
				subkind = proto.String(string(e.FactValue))
			case facts.Text:
				text = &e.FactValue
			case facts.TextEncoding:
				textEncoding = proto.String(string(e.FactValue))
			case facts.AnchorStart:
				n, err := strconv.ParseInt(string(e.FactValue), 10, 64)
				if err == nil {
					startOffset = proto.Int64(n)
				}
			case facts.AnchorEnd:
				n, err := strconv.ParseInt(string(e.FactValue), 10, 64)
				if err == nil {
					endOffset = proto.Int64(n)
				}
			case facts.SnippetStart:
				n, err := strconv.ParseInt(string(e.FactValue), 10, 64)
				if err == nil {
					snippetStart = proto.Int64(n)
				}
			case facts.SnippetEnd:
				n, err := strconv.ParseInt(string(e.FactValue), 10, 64)
				if err == nil {
					snippetEnd = proto.Int64(n)
				}
			default:
				node.Fact = append(node.Fact, &cpb.Fact{
					Name:  e.FactName,
					Value: e.FactValue,
				})
			}
		} else if edges.IsForward(e.EdgeKind) {
			kind, ordinal, _ := edges.ParseOrdinal(e.EdgeKind)
			ticket := kytheuri.ToString(e.Source)
			if _, err := copyEdge.Exec(ticket, kind, kytheuri.ToString(e.Target), ordinal); err != nil {
				return fmt.Errorf("error copying edge: %v", err)
			}
		}
	}

	if _, err := copyNode.Exec(); err != nil {
		return fmt.Errorf("error flushing nodes: %v", err)
	} else if _, err := copyEdge.Exec(); err != nil {
		return fmt.Errorf("error flushing edges: %v", err)
	}

	if err := nodesTx.Commit(); err != nil {
		return fmt.Errorf("error committing Nodes transaction: %v", err)
	} else if err := edgesTx.Commit(); err != nil {
		return fmt.Errorf("error committing Edges transaction: %v", err)
	}

	return nil
}
コード例 #5
0
ファイル: pipeline.go プロジェクト: jwatt/kythe
// Run writes the xrefs and filetree serving tables to db based on the given
// graphstore.Service.
func Run(ctx context.Context, gs graphstore.Service, db keyvalue.DB, opts *Options) error {
	if opts == nil {
		opts = new(Options)
	}

	log.Println("Starting serving pipeline")

	out := &servingOutput{
		xs:  table.ProtoBatchParallel{&table.KVProto{DB: db}},
		idx: &table.KVInverted{DB: db},
	}
	entries := make(chan *spb.Entry, chBuf)

	var cErr error
	var wg sync.WaitGroup
	var sortedEdges disksort.Interface
	wg.Add(1)
	go func() {
		sortedEdges, cErr = combineNodesAndEdges(ctx, opts, out, entries)
		if cErr != nil {
			cErr = fmt.Errorf("error combining nodes and edges: %v", cErr)
		}
		wg.Done()
	}()

	err := gs.Scan(ctx, &spb.ScanRequest{}, func(e *spb.Entry) error {
		if graphstore.IsNodeFact(e) || schema.EdgeDirection(e.EdgeKind) == schema.Forward {
			entries <- e
		}
		return nil
	})
	close(entries)
	if err != nil {
		return fmt.Errorf("error scanning GraphStore: %v", err)
	}

	wg.Wait()
	if cErr != nil {
		return cErr
	}

	pesIn, dIn := make(chan *srvpb.Edge, chBuf), make(chan *srvpb.Edge, chBuf)
	var pErr, fErr error
	wg.Add(2)
	go func() {
		defer wg.Done()
		if err := writePagedEdges(ctx, pesIn, out.xs, opts); err != nil {
			pErr = fmt.Errorf("error writing paged edge sets: %v", err)
		}
	}()
	go func() {
		defer wg.Done()
		if err := writeDecorAndRefs(ctx, opts, dIn, out); err != nil {
			fErr = fmt.Errorf("error writing file decorations: %v", err)
		}
	}()

	err = sortedEdges.Read(func(x interface{}) error {
		e := x.(*srvpb.Edge)
		pesIn <- e
		dIn <- e
		return nil
	})
	close(pesIn)
	close(dIn)
	if err != nil {
		return fmt.Errorf("error reading edges table: %v", err)
	}

	wg.Wait()
	if pErr != nil {
		return pErr
	}
	return fErr
}
コード例 #6
0
ファイル: pipeline.go プロジェクト: kidaa/kythe
// Run writes the xrefs and filetree serving tables to db based on the given
// graphstore.Service.
func Run(ctx context.Context, gs graphstore.Service, db keyvalue.DB, opts *Options) error {
	if opts == nil {
		opts = new(Options)
	}

	log.Println("Starting serving pipeline")
	tbl := table.ProtoBatchParallel{&table.KVProto{db}}

	entries := make(chan *spb.Entry, chBuf)

	ftIn := make(chan *spb.VName, chBuf)
	nIn, eIn, dIn := make(chan *spb.Entry, chBuf), make(chan *spb.Entry, chBuf), make(chan *spb.Entry, chBuf)

	go func() {
		for entry := range entries {
			if graphstore.IsNodeFact(entry) {
				nIn <- entry
				if entry.FactName == schema.NodeKindFact && string(entry.FactValue) == "file" {
					ftIn <- entry.Source
				}
			} else {
				eIn <- entry
			}
			dIn <- entry
		}
		close(ftIn)
		close(nIn)
		close(eIn)
		close(dIn)
	}()
	log.Println("Scanning GraphStore")
	var sErr error
	go func() {
		sErr = gs.Scan(ctx, &spb.ScanRequest{}, func(e *spb.Entry) error {
			entries <- e
			return nil
		})
		if sErr != nil {
			sErr = fmt.Errorf("error scanning GraphStore: %v", sErr)
		}
		close(entries)
	}()

	var (
		ftErr, nErr, eErr, dErr error
		ftWG, xrefsWG           sync.WaitGroup
	)
	ftWG.Add(1)
	go func() {
		defer ftWG.Done()
		ftErr = writeFileTree(ctx, tbl, ftIn)
		if ftErr != nil {
			ftErr = fmt.Errorf("error writing FileTree: %v", ftErr)
		} else {
			log.Println("Wrote FileTree")
		}
	}()
	xrefsWG.Add(3)
	nodes := make(chan *srvpb.Node)
	go func() {
		defer xrefsWG.Done()
		nErr = writeNodes(ctx, tbl, nIn, nodes)
		if nErr != nil {
			nErr = fmt.Errorf("error writing Nodes: %v", nErr)
		} else {
			log.Println("Wrote Nodes")
		}
	}()
	go func() {
		defer xrefsWG.Done()
		eErr = writeEdges(ctx, tbl, eIn, opts.MaxEdgePageSize)
		if eErr != nil {
			eErr = fmt.Errorf("error writing Edges: %v", eErr)
		} else {
			log.Println("Wrote Edges")
		}
	}()
	go func() {
		defer xrefsWG.Done()
		dErr = writeDecorations(ctx, tbl, dIn)
		if dErr != nil {
			dErr = fmt.Errorf("error writing FileDecorations: %v", dErr)
		} else {
			log.Println("Wrote Decorations")
		}
	}()

	var (
		idxWG  sync.WaitGroup
		idxErr error
	)
	idxWG.Add(1)
	go func() {
		defer idxWG.Done()
		idxErr = writeIndex(ctx, &table.KVInverted{db}, nodes)
		if idxErr != nil {
			idxErr = fmt.Errorf("error writing Search Index: %v", idxErr)
		} else {
			log.Println("Wrote Search Index")
		}
	}()

	xrefsWG.Wait()
	if eErr != nil {
		return eErr
	} else if nErr != nil {
		return nErr
	} else if dErr != nil {
		return dErr
	}

	ftWG.Wait()
	if ftErr != nil {
		return ftErr
	}
	idxWG.Wait()
	if idxErr != nil {
		return idxErr
	}

	return sErr
}
コード例 #7
0
ファイル: pipeline.go プロジェクト: herberteuler/kythe
// Run writes the xrefs and filetree serving tables to db based on the given
// graphstore.Service.
func Run(ctx context.Context, gs graphstore.Service, db keyvalue.DB) error {
	log.Println("Starting serving pipeline")
	tbl := &table.KVProto{db}

	// TODO(schroederc): for large corpora, this won't fit in memory
	var files []string

	entries := make(chan *spb.Entry)
	ftIn, nIn, eIn := make(chan *spb.VName), make(chan *spb.Entry), make(chan *spb.Entry)
	go func() {
		for entry := range entries {
			if graphstore.IsNodeFact(entry) {
				nIn <- entry
				if entry.FactName == schema.NodeKindFact && string(entry.FactValue) == "file" {
					ftIn <- entry.Source
					files = append(files, kytheuri.ToString(entry.Source))
				}
			} else {
				eIn <- entry
			}
		}
		close(ftIn)
		close(nIn)
		close(eIn)
	}()
	log.Println("Scanning GraphStore")
	var sErr error
	go func() {
		sErr = gs.Scan(ctx, &spb.ScanRequest{}, func(e *spb.Entry) error {
			entries <- e
			return nil
		})
		if sErr != nil {
			sErr = fmt.Errorf("error scanning GraphStore: %v", sErr)
		}
		close(entries)
	}()

	var (
		ftErr, nErr, eErr error
		ftWG, edgeNodeWG  sync.WaitGroup
	)
	ftWG.Add(1)
	go func() {
		defer ftWG.Done()
		ftErr = writeFileTree(ctx, tbl, ftIn)
		if ftErr != nil {
			ftErr = fmt.Errorf("error writing FileTree: %v", ftErr)
		} else {
			log.Println("Wrote FileTree")
		}
	}()
	edgeNodeWG.Add(2)
	nodes := make(chan *srvpb.Node)
	go func() {
		defer edgeNodeWG.Done()
		nErr = writeNodes(ctx, tbl, nIn, nodes)
		if nErr != nil {
			nErr = fmt.Errorf("error writing Nodes: %v", nErr)
		} else {
			log.Println("Wrote Nodes")
		}
	}()
	go func() {
		defer edgeNodeWG.Done()
		eErr = writeEdges(ctx, tbl, eIn)
		if eErr != nil {
			eErr = fmt.Errorf("error writing Edges: %v", eErr)
		} else {
			log.Println("Wrote Edges")
		}
	}()

	var (
		idxWG  sync.WaitGroup
		idxErr error
	)
	idxWG.Add(1)
	go func() {
		defer idxWG.Done()
		idxErr = writeIndex(ctx, &table.KVInverted{db}, nodes)
		if idxErr != nil {
			idxErr = fmt.Errorf("error writing Search Index: %v", idxErr)
		} else {
			log.Println("Wrote Search Index")
		}
	}()

	edgeNodeWG.Wait()
	if eErr != nil {
		return eErr
	} else if nErr != nil {
		return nErr
	}

	es := xrefs.NodesEdgesService(xsrv.NewCombinedTable(tbl))
	if err := writeDecorations(ctx, tbl, es, files); err != nil {
		return fmt.Errorf("error writing FileDecorations: %v", err)
	}

	ftWG.Wait()
	if ftErr != nil {
		return ftErr
	}
	idxWG.Wait()
	if idxErr != nil {
		return idxErr
	}

	return sErr
}
コード例 #8
0
ファイル: pipeline.go プロジェクト: herberteuler/kythe
func writeEdgePages(ctx context.Context, t table.Proto, gs graphstore.Service) error {
	// TODO(schroederc): spill large PagedEdgeSets into EdgePages
	log.Println("Writing EdgeSets")
	var (
		lastSrc  *spb.VName
		pes      *srvpb.PagedEdgeSet
		grp      *srvpb.EdgeSet_Group
		pesTotal int
	)
	if err := gs.Scan(ctx, new(spb.ScanRequest), func(e *spb.Entry) error {
		if graphstore.IsNodeFact(e) {
			panic("non-edge entry")
		}

		if pes != nil && !compare.VNamesEqual(lastSrc, e.Source) {
			if grp != nil {
				pes.EdgeSet.Group = append(pes.EdgeSet.Group, grp)
				pesTotal += len(grp.TargetTicket)
			}
			pes.TotalEdges = int32(pesTotal)
			if err := t.Put(ctx, xsrv.EdgeSetKey(pes.EdgeSet.SourceTicket), pes); err != nil {
				return err
			}
			pes = nil
			grp = nil
			pesTotal = 0
		}
		if pes == nil {
			pes = &srvpb.PagedEdgeSet{
				EdgeSet: &srvpb.EdgeSet{
					SourceTicket: kytheuri.ToString(e.Source),
				},
			}
		}

		if grp != nil && grp.Kind != e.EdgeKind {
			pes.EdgeSet.Group = append(pes.EdgeSet.Group, grp)
			pesTotal += len(grp.TargetTicket)
			grp = nil
		}
		if grp == nil {
			grp = &srvpb.EdgeSet_Group{
				Kind: e.EdgeKind,
			}
		}

		grp.TargetTicket = append(grp.TargetTicket, kytheuri.ToString(e.Target))
		lastSrc = e.Source
		return nil
	}); err != nil {
		return fmt.Errorf("error scanning reverse edges table: %v", err)
	}
	if pes != nil {
		if grp != nil {
			pes.EdgeSet.Group = append(pes.EdgeSet.Group, grp)
			pesTotal += len(grp.TargetTicket)
		}
		pes.TotalEdges = int32(pesTotal)
		if err := t.Put(ctx, xsrv.EdgeSetKey(pes.EdgeSet.SourceTicket), pes); err != nil {
			return err
		}
	}
	return nil
}