func edgeKindLess(kind1, kind2 string) bool { // General ordering: // anchor edge kinds before non-anchor edge kinds // forward edges before reverse edges // edgeOrdering[i] (and variants) before edgeOrdering[i+1:] // edge variants after root edge kind (ordered lexicographically) // otherwise, order lexicographically if kind1 == kind2 { return false } else if a1, a2 := schema.IsAnchorEdge(kind1), schema.IsAnchorEdge(kind2); a1 != a2 { return a1 } else if d1, d2 := schema.EdgeDirection(kind1), schema.EdgeDirection(kind2); d1 != d2 { return d1 == schema.Forward } kind1, kind2 = schema.Canonicalize(kind1), schema.Canonicalize(kind2) for _, kind := range edgeOrdering { if kind1 == kind { return true } else if kind2 == kind { return false } else if v1, v2 := schema.IsEdgeVariant(kind1, kind), schema.IsEdgeVariant(kind2, kind); v1 != v2 { return v1 } else if v1 { return kind1 < kind2 } } return kind1 < kind2 }
func addReverseEdges(ctx context.Context, gs graphstore.Service) error { log.Println("Adding reverse edges") var ( totalEntries int addedEdges int ) startTime := time.Now() err := gs.Scan(ctx, new(spb.ScanRequest), func(entry *spb.Entry) error { kind := entry.EdgeKind if kind != "" && schema.EdgeDirection(kind) == schema.Forward { if err := gs.Write(ctx, &spb.WriteRequest{ Source: entry.Target, Update: []*spb.WriteRequest_Update{{ Target: entry.Source, EdgeKind: schema.MirrorEdge(kind), FactName: entry.FactName, FactValue: entry.FactValue, }}, }); err != nil { return fmt.Errorf("Failed to write reverse edge: %v", err) } addedEdges++ } totalEntries++ return nil }) log.Printf("Wrote %d reverse edges to GraphStore (%d total entries): %v", addedEdges, totalEntries, time.Since(startTime)) return err }
// EnsureReverseEdges checks if gs contains reverse edges. If it doesn't, it // will scan gs for all forward edges, adding a reverse for each back into the // GraphStore. This is necessary for a GraphStoreService to work properly. func EnsureReverseEdges(ctx context.Context, gs graphstore.Service) error { var edge *spb.Entry if err := gs.Scan(ctx, &spb.ScanRequest{}, func(e *spb.Entry) error { if graphstore.IsEdge(e) { edge = e return io.EOF } return nil }); err != nil { return err } if edge == nil { log.Println("No edges found in GraphStore") return nil } else if schema.EdgeDirection(edge.EdgeKind) == schema.Reverse { return nil } var foundReverse bool if err := gs.Read(ctx, &spb.ReadRequest{ Source: edge.Target, EdgeKind: schema.MirrorEdge(edge.EdgeKind), }, func(entry *spb.Entry) error { foundReverse = true return nil }); err != nil { return fmt.Errorf("error checking for reverse edge: %v", err) } if foundReverse { return nil } return addReverseEdges(ctx, gs) }
func getDecorations(ctx context.Context, es xrefs.EdgesService, anchor *xpb.NodeInfo) ([]*srvpb.FileDecorations_Decoration, error) { var ( isAnchor bool start, end int err error ) for _, f := range anchor.Fact { switch f.Name { case schema.NodeKindFact: if string(f.Value) == schema.AnchorKind { isAnchor = true } case schema.AnchorStartFact: start, err = strconv.Atoi(string(f.Value)) if err != nil { return nil, fmt.Errorf("invalid anchor %q start offset: %q", anchor.Ticket, string(f.Value)) } case schema.AnchorEndFact: end, err = strconv.Atoi(string(f.Value)) if err != nil { return nil, fmt.Errorf("invalid anchor %q end offset: %q", anchor.Ticket, string(f.Value)) } } } if !isAnchor { return nil, nil } else if start > end { log.Printf("Invalid anchor span %d:%d for %q", start, end, anchor.Ticket) return nil, nil } edges, err := es.Edges(ctx, &xpb.EdgesRequest{Ticket: []string{anchor.Ticket}}) if err != nil { return nil, err } if len(edges.EdgeSet) != 1 { return nil, fmt.Errorf("invalid number of EdgeSets returned for anchor: %d", len(edges.EdgeSet)) } a := &srvpb.FileDecorations_Decoration_Anchor{ Ticket: anchor.Ticket, StartOffset: int32(start), EndOffset: int32(end), } var ds []*srvpb.FileDecorations_Decoration for _, grp := range edges.EdgeSet[0].Group { if schema.EdgeDirection(grp.Kind) == schema.Forward && grp.Kind != schema.ChildOfEdge { for _, target := range grp.TargetTicket { ds = append(ds, &srvpb.FileDecorations_Decoration{ Anchor: a, Kind: grp.Kind, TargetTicket: target, }) } } } return ds, nil }
func filterReverses(rd stream.EntryReader) stream.EntryReader { return func(f func(*spb.Entry) error) error { return rd(func(e *spb.Entry) error { if graphstore.IsNodeFact(e) || schema.EdgeDirection(e.EdgeKind) == schema.Forward { return f(e) } return nil }) } }
// ToString returns a human-readable string representation of p. func ToString(p *ipb.Path) string { s := "**" + p.Pivot.NodeKind + "**" for _, e := range p.Edges { if schema.EdgeDirection(e.Kind) == schema.Forward { s += fmt.Sprintf(" -[%s]> %s", e.Kind, e.Target.NodeKind) } else { s += fmt.Sprintf(" <[%s]- %s", schema.MirrorEdge(e.Kind), e.Target.NodeKind) } } return s }
// expandEdgeKind prefixes unrooted (not starting with "/") edge kinds with the // standard Kythe edge prefix ("/kythe/edge/"). func expandEdgeKind(kind string) string { ck := schema.Canonicalize(kind) if strings.HasPrefix(ck, "/") { return kind } expansion := schema.EdgePrefix + ck if schema.EdgeDirection(kind) == schema.Reverse { return schema.MirrorEdge(expansion) } return expansion }
// FromSource creates a set of *ipb.Paths for each of its edges as well as a // single *ipb.Path with only its pivot set to be the source node. func FromSource(src *ipb.Source) []*ipb.Path { var paths []*ipb.Path n := specialize(assemble.Node(src)) paths = append(paths, &ipb.Path{Pivot: n}) for kind, group := range src.EdgeGroups { if schema.EdgeDirection(kind) != schema.Forward { continue } for _, tgt := range group.Edges { paths = append(paths, &ipb.Path{ Pivot: &ipb.Path_Node{ Ticket: tgt.Ticket, }, Edges: []*ipb.Path_Edge{{ Kind: schema.MirrorEdge(kind), Ordinal: int32(tgt.Ordinal), Target: n, }}, }) } } return paths }
// Run writes the xrefs and filetree serving tables to db based on the given // graphstore.Service. func Run(ctx context.Context, gs graphstore.Service, db keyvalue.DB, opts *Options) error { if opts == nil { opts = new(Options) } log.Println("Starting serving pipeline") out := &servingOutput{ xs: table.ProtoBatchParallel{&table.KVProto{DB: db}}, idx: &table.KVInverted{DB: db}, } entries := make(chan *spb.Entry, chBuf) var cErr error var wg sync.WaitGroup var sortedEdges disksort.Interface wg.Add(1) go func() { sortedEdges, cErr = combineNodesAndEdges(ctx, opts, out, entries) if cErr != nil { cErr = fmt.Errorf("error combining nodes and edges: %v", cErr) } wg.Done() }() err := gs.Scan(ctx, &spb.ScanRequest{}, func(e *spb.Entry) error { if graphstore.IsNodeFact(e) || schema.EdgeDirection(e.EdgeKind) == schema.Forward { entries <- e } return nil }) close(entries) if err != nil { return fmt.Errorf("error scanning GraphStore: %v", err) } wg.Wait() if cErr != nil { return cErr } pesIn, dIn := make(chan *srvpb.Edge, chBuf), make(chan *srvpb.Edge, chBuf) var pErr, fErr error wg.Add(2) go func() { defer wg.Done() if err := writePagedEdges(ctx, pesIn, out.xs, opts); err != nil { pErr = fmt.Errorf("error writing paged edge sets: %v", err) } }() go func() { defer wg.Done() if err := writeDecorAndRefs(ctx, opts, dIn, out); err != nil { fErr = fmt.Errorf("error writing file decorations: %v", err) } }() err = sortedEdges.Read(func(x interface{}) error { e := x.(*srvpb.Edge) pesIn <- e dIn <- e return nil }) close(pesIn) close(dIn) if err != nil { return fmt.Errorf("error reading edges table: %v", err) } wg.Wait() if pErr != nil { return pErr } return fErr }
// Decorations implements part of the Service interface. func (g *GraphStoreService) Decorations(ctx context.Context, req *xpb.DecorationsRequest) (*xpb.DecorationsReply, error) { if len(req.DirtyBuffer) > 0 { return nil, errors.New("UNIMPLEMENTED: dirty buffers") } else if req.GetLocation() == nil { // TODO(schroederc): allow empty location when given dirty buffer return nil, errors.New("missing location") } fileVName, err := kytheuri.ToVName(req.Location.Ticket) if err != nil { return nil, fmt.Errorf("invalid file ticket %q: %v", req.Location.Ticket, err) } text, encoding, err := getSourceText(ctx, g.gs, fileVName) if err != nil { return nil, fmt.Errorf("failed to retrieve file text: %v", err) } norm := xrefs.NewNormalizer(text) loc, err := norm.Location(req.GetLocation()) if err != nil { return nil, err } reply := &xpb.DecorationsReply{ Location: loc, Nodes: make(map[string]*xpb.NodeInfo), } // Handle DecorationsRequest.SourceText switch if req.SourceText { if loc.Kind == xpb.Location_FILE { reply.SourceText = text } else { reply.SourceText = text[loc.Start.ByteOffset:loc.End.ByteOffset] } reply.Encoding = encoding } // Handle DecorationsRequest.References switch if req.References { // Traverse the following chain of edges: // file --%/kythe/edge/childof-> []anchor --forwardEdgeKind-> []target // // Add []anchor and []target nodes to reply.Nodes // Add all {anchor, forwardEdgeKind, target} tuples to reply.Reference patterns := xrefs.ConvertFilters(req.Filter) children, err := getEdges(ctx, g.gs, fileVName, func(e *spb.Entry) bool { return e.EdgeKind == revChildOfEdgeKind }) if err != nil { return nil, fmt.Errorf("failed to retrieve file children: %v", err) } targetSet := stringset.New() for _, edge := range children { anchor := edge.Target ticket := kytheuri.ToString(anchor) anchorNodeReply, err := g.Nodes(ctx, &xpb.NodesRequest{ Ticket: []string{ticket}, }) if err != nil { return nil, fmt.Errorf("failure getting reference source node: %v", err) } else if len(anchorNodeReply.Nodes) != 1 { return nil, fmt.Errorf("found %d nodes for {%+v}", len(anchorNodeReply.Nodes), anchor) } node, ok := xrefs.NodesMap(anchorNodeReply.Nodes)[ticket] if !ok { return nil, fmt.Errorf("failed to find info for node %q", ticket) } else if string(node[schema.NodeKindFact]) != schema.AnchorKind { // Skip child if it isn't an anchor node continue } anchorStart, err := strconv.Atoi(string(node[schema.AnchorStartFact])) if err != nil { log.Printf("Invalid anchor start offset %q for node %q: %v", node[schema.AnchorStartFact], ticket, err) continue } anchorEnd, err := strconv.Atoi(string(node[schema.AnchorEndFact])) if err != nil { log.Printf("Invalid anchor end offset %q for node %q: %v", node[schema.AnchorEndFact], ticket, err) continue } if loc.Kind == xpb.Location_SPAN { // Check if anchor fits within/around requested source text window if !xrefs.InSpanBounds(req.SpanKind, int32(anchorStart), int32(anchorEnd), loc.Start.ByteOffset, loc.End.ByteOffset) { continue } else if anchorStart > anchorEnd { log.Printf("Invalid anchor offset span %d:%d", anchorStart, anchorEnd) continue } } targets, err := getEdges(ctx, g.gs, anchor, func(e *spb.Entry) bool { return schema.EdgeDirection(e.EdgeKind) == schema.Forward && e.EdgeKind != schema.ChildOfEdge }) if err != nil { return nil, fmt.Errorf("failed to retrieve targets of anchor %v: %v", anchor, err) } if len(targets) == 0 { log.Printf("Anchor missing forward edges: {%+v}", anchor) continue } if node := filterNode(patterns, anchorNodeReply.Nodes[ticket]); node != nil { reply.Nodes[ticket] = node } for _, edge := range targets { targetTicket := kytheuri.ToString(edge.Target) targetSet.Add(targetTicket) reply.Reference = append(reply.Reference, &xpb.DecorationsReply_Reference{ SourceTicket: ticket, Kind: edge.Kind, TargetTicket: targetTicket, AnchorStart: norm.ByteOffset(int32(anchorStart)), AnchorEnd: norm.ByteOffset(int32(anchorEnd)), }) } } sort.Sort(bySpan(reply.Reference)) // Only request Nodes when there are fact filters given. if len(req.Filter) > 0 { // Ensure returned nodes are not duplicated. for ticket := range reply.Nodes { targetSet.Remove(ticket) } // Batch request all Reference target nodes nodesReply, err := g.Nodes(ctx, &xpb.NodesRequest{ Ticket: targetSet.Slice(), Filter: req.Filter, }) if err != nil { return nil, fmt.Errorf("failure getting reference target nodes: %v", err) } for ticket, node := range nodesReply.Nodes { reply.Nodes[ticket] = node } } } return reply, nil }
func displayEdgeGraph(reply *xpb.EdgesReply) error { nodes := xrefs.NodesMap(reply.Nodes) edges := make(map[string]map[string]stringset.Set) for source, es := range reply.EdgeSets { for gKind, g := range es.Groups { for _, edge := range g.Edge { tgt := edge.TargetTicket src, kind := source, gKind if schema.EdgeDirection(kind) == schema.Reverse { src, kind, tgt = tgt, schema.MirrorEdge(kind), src } groups, ok := edges[src] if !ok { groups = make(map[string]stringset.Set) edges[src] = groups } targets, ok := groups[kind] if !ok { targets = stringset.New() groups[kind] = targets } targets.Add(tgt) } } } if _, err := fmt.Println("digraph kythe {"); err != nil { return err } for ticket, node := range nodes { if _, err := fmt.Printf(` %q [label=<<table><tr><td colspan="2">%s</td></tr>`, ticket, html.EscapeString(ticket)); err != nil { return err } var facts []string for fact := range node { facts = append(facts, fact) } sort.Strings(facts) for _, fact := range facts { if _, err := fmt.Printf("<tr><td>%s</td><td>%s</td></tr>", html.EscapeString(fact), html.EscapeString(string(node[fact]))); err != nil { return err } } if _, err := fmt.Println("</table>> shape=plaintext];"); err != nil { return err } } if _, err := fmt.Println(); err != nil { return err } for src, groups := range edges { for kind, targets := range groups { for tgt := range targets { if _, err := fmt.Printf("\t%q -> %q [label=%q];\n", src, tgt, kind); err != nil { return err } } } } if _, err := fmt.Println("}"); err != nil { return err } return nil }
func main() { flag.Parse() if len(flag.Args()) > 2 || (gs != nil && len(flag.Args()) > 1) { fmt.Fprintf(os.Stderr, "ERROR: too many arguments %v\n", flag.Args()) flag.Usage() os.Exit(1) } if gs != nil { defer gsutil.LogClose(context.Background(), gs) } var in io.ReadCloser = os.Stdin if gs == nil && len(flag.Args()) > 0 { file, err := vfs.Open(context.Background(), flag.Arg(0)) if err != nil { log.Fatalf("Failed to open input file %q: %v", flag.Arg(0), err) } defer file.Close() in = file } outIdx := 1 if gs != nil { outIdx = 0 } var out io.WriteCloser = os.Stdout if len(flag.Args()) > outIdx { file, err := vfs.Create(context.Background(), flag.Arg(outIdx)) if err != nil { log.Fatalf("Failed to create output file %q: %v", flag.Arg(outIdx), err) } defer file.Close() out = file } var ( entries <-chan *spb.Entry reverseEdges int triples int ) if gs == nil { entries = stream.ReadEntries(in) } else { ch := make(chan *spb.Entry) entries = ch go func() { defer close(ch) if err := gs.Scan(context.Background(), &spb.ScanRequest{}, func(e *spb.Entry) error { ch <- e return nil }); err != nil { log.Fatalf("Error scanning graphstore: %v", err) } }() } for entry := range entries { if schema.EdgeDirection(entry.EdgeKind) == schema.Reverse && !*keepReverseEdges { reverseEdges++ continue } t, err := toTriple(entry) if err != nil { log.Fatal(err) } fmt.Fprintln(out, t) triples++ } if !*quiet { if !*keepReverseEdges { log.Printf("Skipped %d reverse edges", reverseEdges) } log.Printf("Wrote %d triples", triples) } }
func (d *DB) copyEntries(entries <-chan *spb.Entry) error { // Start a transaction for a COPY statement per table nodesTx, err := d.Begin() if err != nil { return err } edgesTx, err := d.Begin() if err != nil { return err } // Create each table in their corresponding transactions to speed up COPY if _, err := nodesTx.Exec(createNodesTable); err != nil { return fmt.Errorf("error truncating Nodes table: %v", err) } else if _, err := edgesTx.Exec(createEdgeTable); err != nil { return fmt.Errorf("error truncating Edges table: %v", err) } copyNode, err := nodesTx.Prepare(pq.CopyIn( "nodes", "ticket", "node_kind", "subkind", "text", "text_encoding", "start_offset", "end_offset", "snippet_start", "snippet_end", "other_facts_num", "other_facts", )) if err != nil { return fmt.Errorf("error preparing Nodes copy: %v", err) } copyEdge, err := edgesTx.Prepare(pq.CopyIn( "edges", "source", "kind", "target", "ordinal", )) if err != nil { return fmt.Errorf("error preparing Edges copy: %v", err) } var node srvpb.Node var nodeKind string var subkind, textEncoding *string var text *[]byte var startOffset, endOffset, snippetStart, snippetEnd *int64 for e := range entries { if graphstore.IsNodeFact(e) { ticket := kytheuri.ToString(e.Source) if node.Ticket != "" && node.Ticket != ticket { nodeTicket := node.Ticket node.Ticket = "" var rec []byte if len(node.Fact) > 0 { rec, err = proto.Marshal(&node) if err != nil { return fmt.Errorf("error marshaling facts: %v", err) } } if text != nil && textEncoding == nil { textEncoding = proto.String(schema.DefaultTextEncoding) } if _, err := copyNode.Exec( nodeTicket, nodeKind, subkind, text, textEncoding, startOffset, endOffset, snippetStart, snippetEnd, len(node.Fact), rec, ); err != nil { return fmt.Errorf("error copying node: %v", err) } node.Fact, text = node.Fact[0:0], nil nodeKind = "" subkind, textEncoding = nil, nil startOffset, endOffset, snippetStart, snippetEnd = nil, nil, nil, nil } if node.Ticket == "" { node.Ticket = ticket } switch e.FactName { case schema.NodeKindFact: nodeKind = string(e.FactValue) case schema.SubkindFact: subkind = proto.String(string(e.FactValue)) case schema.TextFact: text = &e.FactValue case schema.TextEncodingFact: textEncoding = proto.String(string(e.FactValue)) case schema.AnchorStartFact: n, err := strconv.ParseInt(string(e.FactValue), 10, 64) if err == nil { startOffset = proto.Int64(n) } case schema.AnchorEndFact: n, err := strconv.ParseInt(string(e.FactValue), 10, 64) if err == nil { endOffset = proto.Int64(n) } case schema.SnippetStartFact: n, err := strconv.ParseInt(string(e.FactValue), 10, 64) if err == nil { snippetStart = proto.Int64(n) } case schema.SnippetEndFact: n, err := strconv.ParseInt(string(e.FactValue), 10, 64) if err == nil { snippetEnd = proto.Int64(n) } default: node.Fact = append(node.Fact, &cpb.Fact{ Name: e.FactName, Value: e.FactValue, }) } } else if schema.EdgeDirection(e.EdgeKind) == schema.Forward { kind, ordinal, _ := schema.ParseOrdinal(e.EdgeKind) ticket := kytheuri.ToString(e.Source) if _, err := copyEdge.Exec(ticket, kind, kytheuri.ToString(e.Target), ordinal); err != nil { return fmt.Errorf("error copying edge: %v", err) } } } if _, err := copyNode.Exec(); err != nil { return fmt.Errorf("error flushing nodes: %v", err) } else if _, err := copyEdge.Exec(); err != nil { return fmt.Errorf("error flushing edges: %v", err) } if err := nodesTx.Commit(); err != nil { return fmt.Errorf("error committing Nodes transaction: %v", err) } else if err := edgesTx.Commit(); err != nil { return fmt.Errorf("error committing Edges transaction: %v", err) } return nil }