func filterReverses(rd stream.EntryReader) stream.EntryReader { return func(f func(*spb.Entry) error) error { return rd(func(e *spb.Entry) error { if graphstore.IsNodeFact(e) || edges.IsForward(e.EdgeKind) { return f(e) } return nil }) } }
func filterReverses(rd stream.EntryReader) stream.EntryReader { return func(f func(*spb.Entry) error) error { return rd(func(e *spb.Entry) error { if graphstore.IsNodeFact(e) || schema.EdgeDirection(e.EdgeKind) == schema.Forward { return f(e) } return nil }) } }
func scanEntries(rows *sql.Rows, f graphstore.EntryFunc) error { for rows.Next() { entry := &spb.Entry{ Source: &spb.VName{}, Target: &spb.VName{}, } err := rows.Scan( &entry.Source.Signature, &entry.Source.Corpus, &entry.Source.Root, &entry.Source.Path, &entry.Source.Language, &entry.EdgeKind, &entry.FactName, &entry.Target.Signature, &entry.Target.Corpus, &entry.Target.Root, &entry.Target.Path, &entry.Target.Language, &entry.FactValue) if err != nil { rows.Close() // ignore errors return err } if graphstore.IsNodeFact(entry) { entry.Target = nil } if err := f(entry); err == io.EOF { rows.Close() return nil } else if err != nil { rows.Close() return err } } return rows.Close() }
func (d *DB) copyEntries(entries <-chan *spb.Entry) error { // Start a transaction for a COPY statement per table nodesTx, err := d.Begin() if err != nil { return err } edgesTx, err := d.Begin() if err != nil { return err } // Create each table in their corresponding transactions to speed up COPY if _, err := nodesTx.Exec(createNodesTable); err != nil { return fmt.Errorf("error truncating Nodes table: %v", err) } else if _, err := edgesTx.Exec(createEdgeTable); err != nil { return fmt.Errorf("error truncating Edges table: %v", err) } copyNode, err := nodesTx.Prepare(pq.CopyIn( "nodes", "ticket", "node_kind", "subkind", "text", "text_encoding", "start_offset", "end_offset", "snippet_start", "snippet_end", "other_facts_num", "other_facts", )) if err != nil { return fmt.Errorf("error preparing Nodes copy: %v", err) } copyEdge, err := edgesTx.Prepare(pq.CopyIn( "edges", "source", "kind", "target", "ordinal", )) if err != nil { return fmt.Errorf("error preparing Edges copy: %v", err) } var node srvpb.Node var nodeKind string var subkind, textEncoding *string var text *[]byte var startOffset, endOffset, snippetStart, snippetEnd *int64 for e := range entries { if graphstore.IsNodeFact(e) { ticket := kytheuri.ToString(e.Source) if node.Ticket != "" && node.Ticket != ticket { nodeTicket := node.Ticket node.Ticket = "" var rec []byte if len(node.Fact) > 0 { rec, err = proto.Marshal(&node) if err != nil { return fmt.Errorf("error marshaling facts: %v", err) } } if text != nil && textEncoding == nil { textEncoding = proto.String(facts.DefaultTextEncoding) } if _, err := copyNode.Exec( nodeTicket, nodeKind, subkind, text, textEncoding, startOffset, endOffset, snippetStart, snippetEnd, len(node.Fact), rec, ); err != nil { return fmt.Errorf("error copying node: %v", err) } node.Fact, text = node.Fact[0:0], nil nodeKind = "" subkind, textEncoding = nil, nil startOffset, endOffset, snippetStart, snippetEnd = nil, nil, nil, nil } if node.Ticket == "" { node.Ticket = ticket } switch e.FactName { case facts.NodeKind: nodeKind = string(e.FactValue) case facts.Subkind: subkind = proto.String(string(e.FactValue)) case facts.Text: text = &e.FactValue case facts.TextEncoding: textEncoding = proto.String(string(e.FactValue)) case facts.AnchorStart: n, err := strconv.ParseInt(string(e.FactValue), 10, 64) if err == nil { startOffset = proto.Int64(n) } case facts.AnchorEnd: n, err := strconv.ParseInt(string(e.FactValue), 10, 64) if err == nil { endOffset = proto.Int64(n) } case facts.SnippetStart: n, err := strconv.ParseInt(string(e.FactValue), 10, 64) if err == nil { snippetStart = proto.Int64(n) } case facts.SnippetEnd: n, err := strconv.ParseInt(string(e.FactValue), 10, 64) if err == nil { snippetEnd = proto.Int64(n) } default: node.Fact = append(node.Fact, &cpb.Fact{ Name: e.FactName, Value: e.FactValue, }) } } else if edges.IsForward(e.EdgeKind) { kind, ordinal, _ := edges.ParseOrdinal(e.EdgeKind) ticket := kytheuri.ToString(e.Source) if _, err := copyEdge.Exec(ticket, kind, kytheuri.ToString(e.Target), ordinal); err != nil { return fmt.Errorf("error copying edge: %v", err) } } } if _, err := copyNode.Exec(); err != nil { return fmt.Errorf("error flushing nodes: %v", err) } else if _, err := copyEdge.Exec(); err != nil { return fmt.Errorf("error flushing edges: %v", err) } if err := nodesTx.Commit(); err != nil { return fmt.Errorf("error committing Nodes transaction: %v", err) } else if err := edgesTx.Commit(); err != nil { return fmt.Errorf("error committing Edges transaction: %v", err) } return nil }
// Run writes the xrefs and filetree serving tables to db based on the given // graphstore.Service. func Run(ctx context.Context, gs graphstore.Service, db keyvalue.DB, opts *Options) error { if opts == nil { opts = new(Options) } log.Println("Starting serving pipeline") out := &servingOutput{ xs: table.ProtoBatchParallel{&table.KVProto{DB: db}}, idx: &table.KVInverted{DB: db}, } entries := make(chan *spb.Entry, chBuf) var cErr error var wg sync.WaitGroup var sortedEdges disksort.Interface wg.Add(1) go func() { sortedEdges, cErr = combineNodesAndEdges(ctx, opts, out, entries) if cErr != nil { cErr = fmt.Errorf("error combining nodes and edges: %v", cErr) } wg.Done() }() err := gs.Scan(ctx, &spb.ScanRequest{}, func(e *spb.Entry) error { if graphstore.IsNodeFact(e) || schema.EdgeDirection(e.EdgeKind) == schema.Forward { entries <- e } return nil }) close(entries) if err != nil { return fmt.Errorf("error scanning GraphStore: %v", err) } wg.Wait() if cErr != nil { return cErr } pesIn, dIn := make(chan *srvpb.Edge, chBuf), make(chan *srvpb.Edge, chBuf) var pErr, fErr error wg.Add(2) go func() { defer wg.Done() if err := writePagedEdges(ctx, pesIn, out.xs, opts); err != nil { pErr = fmt.Errorf("error writing paged edge sets: %v", err) } }() go func() { defer wg.Done() if err := writeDecorAndRefs(ctx, opts, dIn, out); err != nil { fErr = fmt.Errorf("error writing file decorations: %v", err) } }() err = sortedEdges.Read(func(x interface{}) error { e := x.(*srvpb.Edge) pesIn <- e dIn <- e return nil }) close(pesIn) close(dIn) if err != nil { return fmt.Errorf("error reading edges table: %v", err) } wg.Wait() if pErr != nil { return pErr } return fErr }
// Run writes the xrefs and filetree serving tables to db based on the given // graphstore.Service. func Run(ctx context.Context, gs graphstore.Service, db keyvalue.DB, opts *Options) error { if opts == nil { opts = new(Options) } log.Println("Starting serving pipeline") tbl := table.ProtoBatchParallel{&table.KVProto{db}} entries := make(chan *spb.Entry, chBuf) ftIn := make(chan *spb.VName, chBuf) nIn, eIn, dIn := make(chan *spb.Entry, chBuf), make(chan *spb.Entry, chBuf), make(chan *spb.Entry, chBuf) go func() { for entry := range entries { if graphstore.IsNodeFact(entry) { nIn <- entry if entry.FactName == schema.NodeKindFact && string(entry.FactValue) == "file" { ftIn <- entry.Source } } else { eIn <- entry } dIn <- entry } close(ftIn) close(nIn) close(eIn) close(dIn) }() log.Println("Scanning GraphStore") var sErr error go func() { sErr = gs.Scan(ctx, &spb.ScanRequest{}, func(e *spb.Entry) error { entries <- e return nil }) if sErr != nil { sErr = fmt.Errorf("error scanning GraphStore: %v", sErr) } close(entries) }() var ( ftErr, nErr, eErr, dErr error ftWG, xrefsWG sync.WaitGroup ) ftWG.Add(1) go func() { defer ftWG.Done() ftErr = writeFileTree(ctx, tbl, ftIn) if ftErr != nil { ftErr = fmt.Errorf("error writing FileTree: %v", ftErr) } else { log.Println("Wrote FileTree") } }() xrefsWG.Add(3) nodes := make(chan *srvpb.Node) go func() { defer xrefsWG.Done() nErr = writeNodes(ctx, tbl, nIn, nodes) if nErr != nil { nErr = fmt.Errorf("error writing Nodes: %v", nErr) } else { log.Println("Wrote Nodes") } }() go func() { defer xrefsWG.Done() eErr = writeEdges(ctx, tbl, eIn, opts.MaxEdgePageSize) if eErr != nil { eErr = fmt.Errorf("error writing Edges: %v", eErr) } else { log.Println("Wrote Edges") } }() go func() { defer xrefsWG.Done() dErr = writeDecorations(ctx, tbl, dIn) if dErr != nil { dErr = fmt.Errorf("error writing FileDecorations: %v", dErr) } else { log.Println("Wrote Decorations") } }() var ( idxWG sync.WaitGroup idxErr error ) idxWG.Add(1) go func() { defer idxWG.Done() idxErr = writeIndex(ctx, &table.KVInverted{db}, nodes) if idxErr != nil { idxErr = fmt.Errorf("error writing Search Index: %v", idxErr) } else { log.Println("Wrote Search Index") } }() xrefsWG.Wait() if eErr != nil { return eErr } else if nErr != nil { return nErr } else if dErr != nil { return dErr } ftWG.Wait() if ftErr != nil { return ftErr } idxWG.Wait() if idxErr != nil { return idxErr } return sErr }
// Run writes the xrefs and filetree serving tables to db based on the given // graphstore.Service. func Run(ctx context.Context, gs graphstore.Service, db keyvalue.DB) error { log.Println("Starting serving pipeline") tbl := &table.KVProto{db} // TODO(schroederc): for large corpora, this won't fit in memory var files []string entries := make(chan *spb.Entry) ftIn, nIn, eIn := make(chan *spb.VName), make(chan *spb.Entry), make(chan *spb.Entry) go func() { for entry := range entries { if graphstore.IsNodeFact(entry) { nIn <- entry if entry.FactName == schema.NodeKindFact && string(entry.FactValue) == "file" { ftIn <- entry.Source files = append(files, kytheuri.ToString(entry.Source)) } } else { eIn <- entry } } close(ftIn) close(nIn) close(eIn) }() log.Println("Scanning GraphStore") var sErr error go func() { sErr = gs.Scan(ctx, &spb.ScanRequest{}, func(e *spb.Entry) error { entries <- e return nil }) if sErr != nil { sErr = fmt.Errorf("error scanning GraphStore: %v", sErr) } close(entries) }() var ( ftErr, nErr, eErr error ftWG, edgeNodeWG sync.WaitGroup ) ftWG.Add(1) go func() { defer ftWG.Done() ftErr = writeFileTree(ctx, tbl, ftIn) if ftErr != nil { ftErr = fmt.Errorf("error writing FileTree: %v", ftErr) } else { log.Println("Wrote FileTree") } }() edgeNodeWG.Add(2) nodes := make(chan *srvpb.Node) go func() { defer edgeNodeWG.Done() nErr = writeNodes(ctx, tbl, nIn, nodes) if nErr != nil { nErr = fmt.Errorf("error writing Nodes: %v", nErr) } else { log.Println("Wrote Nodes") } }() go func() { defer edgeNodeWG.Done() eErr = writeEdges(ctx, tbl, eIn) if eErr != nil { eErr = fmt.Errorf("error writing Edges: %v", eErr) } else { log.Println("Wrote Edges") } }() var ( idxWG sync.WaitGroup idxErr error ) idxWG.Add(1) go func() { defer idxWG.Done() idxErr = writeIndex(ctx, &table.KVInverted{db}, nodes) if idxErr != nil { idxErr = fmt.Errorf("error writing Search Index: %v", idxErr) } else { log.Println("Wrote Search Index") } }() edgeNodeWG.Wait() if eErr != nil { return eErr } else if nErr != nil { return nErr } es := xrefs.NodesEdgesService(xsrv.NewCombinedTable(tbl)) if err := writeDecorations(ctx, tbl, es, files); err != nil { return fmt.Errorf("error writing FileDecorations: %v", err) } ftWG.Wait() if ftErr != nil { return ftErr } idxWG.Wait() if idxErr != nil { return idxErr } return sErr }
func writeEdgePages(ctx context.Context, t table.Proto, gs graphstore.Service) error { // TODO(schroederc): spill large PagedEdgeSets into EdgePages log.Println("Writing EdgeSets") var ( lastSrc *spb.VName pes *srvpb.PagedEdgeSet grp *srvpb.EdgeSet_Group pesTotal int ) if err := gs.Scan(ctx, new(spb.ScanRequest), func(e *spb.Entry) error { if graphstore.IsNodeFact(e) { panic("non-edge entry") } if pes != nil && !compare.VNamesEqual(lastSrc, e.Source) { if grp != nil { pes.EdgeSet.Group = append(pes.EdgeSet.Group, grp) pesTotal += len(grp.TargetTicket) } pes.TotalEdges = int32(pesTotal) if err := t.Put(ctx, xsrv.EdgeSetKey(pes.EdgeSet.SourceTicket), pes); err != nil { return err } pes = nil grp = nil pesTotal = 0 } if pes == nil { pes = &srvpb.PagedEdgeSet{ EdgeSet: &srvpb.EdgeSet{ SourceTicket: kytheuri.ToString(e.Source), }, } } if grp != nil && grp.Kind != e.EdgeKind { pes.EdgeSet.Group = append(pes.EdgeSet.Group, grp) pesTotal += len(grp.TargetTicket) grp = nil } if grp == nil { grp = &srvpb.EdgeSet_Group{ Kind: e.EdgeKind, } } grp.TargetTicket = append(grp.TargetTicket, kytheuri.ToString(e.Target)) lastSrc = e.Source return nil }); err != nil { return fmt.Errorf("error scanning reverse edges table: %v", err) } if pes != nil { if grp != nil { pes.EdgeSet.Group = append(pes.EdgeSet.Group, grp) pesTotal += len(grp.TargetTicket) } pes.TotalEdges = int32(pesTotal) if err := t.Put(ctx, xsrv.EdgeSetKey(pes.EdgeSet.SourceTicket), pes); err != nil { return err } } return nil }