func main() { flag.Parse() if len(flag.Args()) > 0 { flagutil.UsageErrorf("unknown arguments: %v", flag.Args()) } in := os.Stdin var entries <-chan *spb.Entry if *readJSON { entries = stream.ReadJSONEntries(in) } else { entries = stream.ReadEntries(in) } if *sortStream || *entrySets { entries = sortEntries(entries) } encoder := json.NewEncoder(os.Stdout) wr := delimited.NewWriter(os.Stdout) var set entrySet entryCount := 0 for entry := range entries { if *countOnly { entryCount++ } else if *entrySets { if compare.VNamesEqual(set.Source, entry.Source) || !compare.VNamesEqual(set.Target, entry.Target) || set.EdgeKind != entry.EdgeKind { if len(set.Properties) != 0 { failOnErr(encoder.Encode(set)) } set.Source = entry.Source set.EdgeKind = entry.EdgeKind set.Target = entry.Target set.Properties = make(map[string]string) } set.Properties[entry.FactName] = string(entry.FactValue) } else if *writeJSON { failOnErr(encoder.Encode(entry)) } else { rec, err := proto.Marshal(entry) failOnErr(err) failOnErr(wr.Put(rec)) } } if len(set.Properties) != 0 { failOnErr(encoder.Encode(set)) } if *countOnly { fmt.Println(entryCount) } }
func collectNodes(nodeEntries <-chan *spb.Entry) <-chan *srvpb.Node { nodes := make(chan *srvpb.Node) go func() { var ( node *srvpb.Node vname *spb.VName ) for e := range nodeEntries { if node != nil && !compare.VNamesEqual(e.Source, vname) { nodes <- node node = nil vname = nil } if node == nil { vname = e.Source ticket := kytheuri.ToString(vname) node = &srvpb.Node{Ticket: ticket} } node.Fact = append(node.Fact, &srvpb.Node_Fact{ Name: e.FactName, Value: e.FactValue, }) } if node != nil { nodes <- node } close(nodes) }() return nodes }
// BatchWrites returns a channel of WriteRequests for the given entries. // Consecutive entries with the same Source will be collected in the same // WriteRequest, with each request containing up to maxSize updates. func BatchWrites(entries <-chan *spb.Entry, maxSize int) <-chan *spb.WriteRequest { ch := make(chan *spb.WriteRequest) go func() { defer close(ch) var req *spb.WriteRequest for entry := range entries { update := &spb.WriteRequest_Update{ EdgeKind: entry.EdgeKind, Target: entry.Target, FactName: entry.FactName, FactValue: entry.FactValue, } if req != nil && (!compare.VNamesEqual(req.Source, entry.Source) || len(req.Update) >= maxSize) { ch <- req req = nil } if req == nil { req = &spb.WriteRequest{ Source: entry.Source, Update: []*spb.WriteRequest_Update{update}, } } else { req.Update = append(req.Update, update) } } if req != nil { ch <- req } }() return ch }
// Sources constructs a new Source for every contiguous set of entries sharing // the same Source, calling f for each. func Sources(rd stream.EntryReader, f func(*ipb.Source) error) error { var source *spb.VName var src *ipb.Source if err := rd(func(entry *spb.Entry) error { if src != nil && !compare.VNamesEqual(source, entry.Source) { if err := f(src); err != nil { return err } src = nil } if src == nil { source = entry.Source src = &ipb.Source{ Ticket: kytheuri.ToString(entry.Source), Facts: make(map[string][]byte), EdgeGroups: make(map[string]*ipb.Source_EdgeGroup), } } AppendEntry(src, entry) return nil }); err != nil { return err } if src != nil { return f(src) } return nil }
func writeEdges(ctx context.Context, t table.Proto, edges <-chan *spb.Entry) error { tempDir, err := ioutil.TempDir("", "reverse.edges") if err != nil { return fmt.Errorf("failed to create temporary directory: %v", err) } defer func() { drainEntries(edges) // ensure channel is drained on errors log.Println("Removing temporary edges table", tempDir) if err := os.RemoveAll(tempDir); err != nil { log.Printf("Failed to remove temporary directory %q: %v", tempDir, err) } }() gs, err := leveldb.OpenGraphStore(tempDir, nil) if err != nil { return fmt.Errorf("failed to create temporary GraphStore: %v", err) } defer gs.Close(ctx) log.Println("Writing temporary reverse edges table") var writeReq *spb.WriteRequest for e := range edges { if writeReq != nil && !compare.VNamesEqual(e.Source, writeReq.Source) { if err := writeWithReverses(ctx, gs, writeReq); err != nil { return err } writeReq = nil } if writeReq == nil { writeReq = &spb.WriteRequest{Source: e.Source} } writeReq.Update = append(writeReq.Update, &spb.WriteRequest_Update{ Target: e.Target, EdgeKind: e.EdgeKind, FactName: e.FactName, FactValue: e.FactValue, }) } if writeReq != nil { if err := writeWithReverses(ctx, gs, writeReq); err != nil { return err } } return writeEdgePages(ctx, t, gs) }
func writeEdges(ctx context.Context, t table.Proto, edges <-chan *spb.Entry, maxEdgePageSize int) error { defer drainEntries(edges) // ensure channel is drained on errors temp, err := tempTable("edge.groups") if err != nil { return fmt.Errorf("failed to create temporary table: %v", err) } edgeGroups := &table.KVProto{temp} defer func() { if err := edgeGroups.Close(ctx); err != nil { log.Println("Error closing edge groups table: %v", err) } }() log.Println("Writing temporary edges table") var ( src *spb.VName kind string targets stringset.Set ) for e := range edges { if src != nil && (!compare.VNamesEqual(e.Source, src) || kind != e.EdgeKind) { if err := writeWithReverses(ctx, edgeGroups, kytheuri.ToString(src), kind, targets.Slice()); err != nil { return err } src = nil } if src == nil { src = e.Source kind = e.EdgeKind targets = stringset.New() } targets.Add(kytheuri.ToString(e.Target)) } if src != nil { if err := writeWithReverses(ctx, edgeGroups, kytheuri.ToString(src), kind, targets.Slice()); err != nil { return err } } return writeEdgePages(ctx, t, edgeGroups, maxEdgePageSize) }
func writeEdgePages(ctx context.Context, t table.Proto, gs graphstore.Service) error { // TODO(schroederc): spill large PagedEdgeSets into EdgePages log.Println("Writing EdgeSets") var ( lastSrc *spb.VName pes *srvpb.PagedEdgeSet grp *srvpb.EdgeSet_Group pesTotal int ) if err := gs.Scan(ctx, new(spb.ScanRequest), func(e *spb.Entry) error { if e.EdgeKind == "" { panic("non-edge entry") } if pes != nil && !compare.VNamesEqual(lastSrc, e.Source) { if grp != nil { pes.EdgeSet.Group = append(pes.EdgeSet.Group, grp) pesTotal += len(grp.TargetTicket) } pes.TotalEdges = int32(pesTotal) if err := t.Put(xsrv.EdgeSetKey(pes.EdgeSet.SourceTicket), pes); err != nil { return err } pes = nil grp = nil pesTotal = 0 } if pes == nil { pes = &srvpb.PagedEdgeSet{ EdgeSet: &srvpb.EdgeSet{ SourceTicket: kytheuri.ToString(e.Source), }, } } if grp != nil && grp.Kind != e.EdgeKind { pes.EdgeSet.Group = append(pes.EdgeSet.Group, grp) pesTotal += len(grp.TargetTicket) grp = nil } if grp == nil { grp = &srvpb.EdgeSet_Group{ Kind: e.EdgeKind, } } grp.TargetTicket = append(grp.TargetTicket, kytheuri.ToString(e.Target)) lastSrc = e.Source return nil }); err != nil { return err } if pes != nil { if grp != nil { pes.EdgeSet.Group = append(pes.EdgeSet.Group, grp) pesTotal += len(grp.TargetTicket) } pes.TotalEdges = int32(pesTotal) if err := t.Put(xsrv.EdgeSetKey(pes.EdgeSet.SourceTicket), pes); err != nil { return err } } return nil }
func combineNodesAndEdges(ctx context.Context, opts *Options, out *servingOutput, gsEntries <-chan *spb.Entry) (disksort.Interface, error) { log.Println("Writing partial edges") tree := filetree.NewMap() partialSorter, err := opts.diskSorter(edgeLesser{}, edgeMarshaler{}) if err != nil { return nil, err } bIdx := out.idx.Buffered() var src *spb.VName var entries []*spb.Entry for e := range gsEntries { if e.FactName == schema.NodeKindFact && string(e.FactValue) == schema.FileKind { tree.AddFile(e.Source) // TODO(schroederc): evict finished directories (based on GraphStore order) } if src == nil { src = e.Source } else if !compare.VNamesEqual(e.Source, src) { if err := writePartialEdges(ctx, partialSorter, bIdx, assemble.SourceFromEntries(entries)); err != nil { drainEntries(gsEntries) return nil, err } src = e.Source entries = nil } entries = append(entries, e) } if len(entries) > 0 { if err := writePartialEdges(ctx, partialSorter, bIdx, assemble.SourceFromEntries(entries)); err != nil { return nil, err } } if err := bIdx.Flush(ctx); err != nil { return nil, err } if err := writeFileTree(ctx, tree, out.xs); err != nil { return nil, fmt.Errorf("error writing file tree: %v", err) } tree = nil log.Println("Writing complete edges") cSorter, err := opts.diskSorter(edgeLesser{}, edgeMarshaler{}) if err != nil { return nil, err } var n *srvpb.Node if err := partialSorter.Read(func(i interface{}) error { e := i.(*srvpb.Edge) if n == nil || n.Ticket != e.Source.Ticket { n = e.Source return cSorter.Add(e) } else if e.Target != nil { e.Source = n if err := writeCompletedEdges(ctx, cSorter, e); err != nil { return fmt.Errorf("error writing complete edge: %v", err) } } return nil }); err != nil { return nil, fmt.Errorf("error reading/writing edges: %v", err) } return cSorter, nil }
func main() { flag.Parse() if len(flag.Args()) > 0 { flagutil.UsageErrorf("unknown arguments: %v", flag.Args()) } in := bufio.NewReaderSize(os.Stdin, 2*4096) out := bufio.NewWriter(os.Stdout) var rd stream.EntryReader if *readJSON { rd = stream.NewJSONReader(in) } else { rd = stream.NewReader(in) } if *sortStream || *entrySets || *uniqEntries { var err error rd, err = sortEntries(rd) failOnErr(err) } if *uniqEntries { rd = dedupEntries(rd) } switch { case *countOnly: var count int failOnErr(rd(func(_ *spb.Entry) error { count++ return nil })) fmt.Println(count) case *entrySets: encoder := json.NewEncoder(out) var set entrySet failOnErr(rd(func(entry *spb.Entry) error { if !compare.VNamesEqual(set.Source, entry.Source) || !compare.VNamesEqual(set.Target, entry.Target) || set.EdgeKind != entry.EdgeKind { if len(set.Properties) != 0 { if err := encoder.Encode(set); err != nil { return err } } set.Source = entry.Source set.EdgeKind = entry.EdgeKind set.Target = entry.Target set.Properties = make(map[string]string) } set.Properties[entry.FactName] = string(entry.FactValue) return nil })) if len(set.Properties) != 0 { failOnErr(encoder.Encode(set)) } case *writeJSON: encoder := json.NewEncoder(out) failOnErr(rd(func(entry *spb.Entry) error { return encoder.Encode(entry) })) default: wr := delimited.NewWriter(out) failOnErr(rd(func(entry *spb.Entry) error { rec, err := proto.Marshal(entry) if err != nil { return err } return wr.Put(rec) })) } failOnErr(out.Flush()) }
// EntryMatchesScan reports whether entry belongs in the result set for req. func EntryMatchesScan(req *spb.ScanRequest, entry *spb.Entry) bool { return (req.GetTarget() == nil || compare.VNamesEqual(entry.Target, req.Target)) && (req.EdgeKind == "" || entry.EdgeKind == req.EdgeKind) && strings.HasPrefix(entry.FactName, req.FactPrefix) }
func combineNodesAndEdges(ctx context.Context, out *servingOutput, gsEntries <-chan *spb.Entry) error { log.Println("Writing partial edges") tree := filetree.NewMap() var src *spb.VName var entries []*spb.Entry for e := range gsEntries { if e.FactName == schema.NodeKindFact && string(e.FactValue) == schema.FileKind { tree.AddFile(e.Source) // TODO(schroederc): evict finished directories (based on GraphStore order) } if src == nil { src = e.Source } else if !compare.VNamesEqual(e.Source, src) { if err := writePartialEdges(ctx, out, assemble.SourceFromEntries(entries)); err != nil { drainEntries(gsEntries) return err } src = e.Source entries = nil } entries = append(entries, e) } if len(entries) > 0 { if err := writePartialEdges(ctx, out, assemble.SourceFromEntries(entries)); err != nil { return err } } if err := writeFileTree(ctx, tree, out.xs); err != nil { return fmt.Errorf("error writing file tree: %v", err) } tree = nil log.Println("Writing complete edges") snapshot := out.completeEdges.NewSnapshot() defer snapshot.Close() it, err := out.completeEdges.ScanPrefix(nil, &keyvalue.Options{ LargeRead: true, Snapshot: snapshot, }) if err != nil { return err } defer it.Close() var n *srvpb.Node var e srvpb.Edge for { k, v, err := it.Next() if err == io.EOF { break } else if err != nil { return fmt.Errorf("error scanning partial edges table: %v", err) } ss := strings.Split(string(k), tempTableKeySep) if len(ss) != 3 { return fmt.Errorf("invalid partial edge table key: %q", string(k)) } if err := proto.Unmarshal(v, &e); err != nil { return fmt.Errorf("invalid partial edge table value: %v", err) } if n == nil || n.Ticket != ss[0] { n = e.Source } else if e.Target != nil { e.Source = n if err := writeCompletedEdges(ctx, out.completeEdges, &e); err != nil { return fmt.Errorf("error writing complete edge: %v", err) } } } return nil }