Beispiel #1
0
func main() {
	flag.Parse()
	if len(flag.Args()) > 0 {
		flagutil.UsageErrorf("unknown arguments: %v", flag.Args())
	}

	in := os.Stdin
	var entries <-chan *spb.Entry
	if *readJSON {
		entries = stream.ReadJSONEntries(in)
	} else {
		entries = stream.ReadEntries(in)
	}
	if *sortStream || *entrySets {
		entries = sortEntries(entries)
	}

	encoder := json.NewEncoder(os.Stdout)
	wr := delimited.NewWriter(os.Stdout)

	var set entrySet
	entryCount := 0
	for entry := range entries {
		if *countOnly {
			entryCount++
		} else if *entrySets {
			if compare.VNamesEqual(set.Source, entry.Source) || !compare.VNamesEqual(set.Target, entry.Target) || set.EdgeKind != entry.EdgeKind {
				if len(set.Properties) != 0 {
					failOnErr(encoder.Encode(set))
				}
				set.Source = entry.Source
				set.EdgeKind = entry.EdgeKind
				set.Target = entry.Target
				set.Properties = make(map[string]string)
			}
			set.Properties[entry.FactName] = string(entry.FactValue)
		} else if *writeJSON {
			failOnErr(encoder.Encode(entry))
		} else {
			rec, err := proto.Marshal(entry)
			failOnErr(err)
			failOnErr(wr.Put(rec))
		}
	}
	if len(set.Properties) != 0 {
		failOnErr(encoder.Encode(set))
	}
	if *countOnly {
		fmt.Println(entryCount)
	}
}
Beispiel #2
0
func collectNodes(nodeEntries <-chan *spb.Entry) <-chan *srvpb.Node {
	nodes := make(chan *srvpb.Node)
	go func() {
		var (
			node  *srvpb.Node
			vname *spb.VName
		)
		for e := range nodeEntries {
			if node != nil && !compare.VNamesEqual(e.Source, vname) {
				nodes <- node
				node = nil
				vname = nil
			}
			if node == nil {
				vname = e.Source
				ticket := kytheuri.ToString(vname)
				node = &srvpb.Node{Ticket: ticket}
			}
			node.Fact = append(node.Fact, &srvpb.Node_Fact{
				Name:  e.FactName,
				Value: e.FactValue,
			})
		}
		if node != nil {
			nodes <- node
		}
		close(nodes)
	}()
	return nodes
}
Beispiel #3
0
// BatchWrites returns a channel of WriteRequests for the given entries.
// Consecutive entries with the same Source will be collected in the same
// WriteRequest, with each request containing up to maxSize updates.
func BatchWrites(entries <-chan *spb.Entry, maxSize int) <-chan *spb.WriteRequest {
	ch := make(chan *spb.WriteRequest)
	go func() {
		defer close(ch)
		var req *spb.WriteRequest
		for entry := range entries {
			update := &spb.WriteRequest_Update{
				EdgeKind:  entry.EdgeKind,
				Target:    entry.Target,
				FactName:  entry.FactName,
				FactValue: entry.FactValue,
			}

			if req != nil && (!compare.VNamesEqual(req.Source, entry.Source) || len(req.Update) >= maxSize) {
				ch <- req
				req = nil
			}

			if req == nil {
				req = &spb.WriteRequest{
					Source: entry.Source,
					Update: []*spb.WriteRequest_Update{update},
				}
			} else {
				req.Update = append(req.Update, update)
			}
		}
		if req != nil {
			ch <- req
		}
	}()
	return ch
}
Beispiel #4
0
// Sources constructs a new Source for every contiguous set of entries sharing
// the same Source, calling f for each.
func Sources(rd stream.EntryReader, f func(*ipb.Source) error) error {
	var source *spb.VName
	var src *ipb.Source
	if err := rd(func(entry *spb.Entry) error {
		if src != nil && !compare.VNamesEqual(source, entry.Source) {
			if err := f(src); err != nil {
				return err
			}
			src = nil
		}
		if src == nil {
			source = entry.Source
			src = &ipb.Source{
				Ticket:     kytheuri.ToString(entry.Source),
				Facts:      make(map[string][]byte),
				EdgeGroups: make(map[string]*ipb.Source_EdgeGroup),
			}
		}
		AppendEntry(src, entry)
		return nil
	}); err != nil {
		return err
	}
	if src != nil {
		return f(src)
	}
	return nil
}
Beispiel #5
0
func writeEdges(ctx context.Context, t table.Proto, edges <-chan *spb.Entry) error {
	tempDir, err := ioutil.TempDir("", "reverse.edges")
	if err != nil {
		return fmt.Errorf("failed to create temporary directory: %v", err)
	}
	defer func() {
		drainEntries(edges) // ensure channel is drained on errors
		log.Println("Removing temporary edges table", tempDir)
		if err := os.RemoveAll(tempDir); err != nil {
			log.Printf("Failed to remove temporary directory %q: %v", tempDir, err)
		}
	}()
	gs, err := leveldb.OpenGraphStore(tempDir, nil)
	if err != nil {
		return fmt.Errorf("failed to create temporary GraphStore: %v", err)
	}
	defer gs.Close(ctx)

	log.Println("Writing temporary reverse edges table")
	var writeReq *spb.WriteRequest
	for e := range edges {
		if writeReq != nil && !compare.VNamesEqual(e.Source, writeReq.Source) {
			if err := writeWithReverses(ctx, gs, writeReq); err != nil {
				return err
			}
			writeReq = nil
		}
		if writeReq == nil {
			writeReq = &spb.WriteRequest{Source: e.Source}
		}
		writeReq.Update = append(writeReq.Update, &spb.WriteRequest_Update{
			Target:    e.Target,
			EdgeKind:  e.EdgeKind,
			FactName:  e.FactName,
			FactValue: e.FactValue,
		})
	}
	if writeReq != nil {
		if err := writeWithReverses(ctx, gs, writeReq); err != nil {
			return err
		}
	}

	return writeEdgePages(ctx, t, gs)
}
Beispiel #6
0
func writeEdges(ctx context.Context, t table.Proto, edges <-chan *spb.Entry, maxEdgePageSize int) error {
	defer drainEntries(edges) // ensure channel is drained on errors

	temp, err := tempTable("edge.groups")
	if err != nil {
		return fmt.Errorf("failed to create temporary table: %v", err)
	}
	edgeGroups := &table.KVProto{temp}
	defer func() {
		if err := edgeGroups.Close(ctx); err != nil {
			log.Println("Error closing edge groups table: %v", err)
		}
	}()

	log.Println("Writing temporary edges table")

	var (
		src     *spb.VName
		kind    string
		targets stringset.Set
	)
	for e := range edges {
		if src != nil && (!compare.VNamesEqual(e.Source, src) || kind != e.EdgeKind) {
			if err := writeWithReverses(ctx, edgeGroups, kytheuri.ToString(src), kind, targets.Slice()); err != nil {
				return err
			}
			src = nil
		}
		if src == nil {
			src = e.Source
			kind = e.EdgeKind
			targets = stringset.New()
		}
		targets.Add(kytheuri.ToString(e.Target))
	}
	if src != nil {
		if err := writeWithReverses(ctx, edgeGroups, kytheuri.ToString(src), kind, targets.Slice()); err != nil {
			return err
		}
	}

	return writeEdgePages(ctx, t, edgeGroups, maxEdgePageSize)
}
Beispiel #7
0
func writeEdgePages(ctx context.Context, t table.Proto, gs graphstore.Service) error {
	// TODO(schroederc): spill large PagedEdgeSets into EdgePages
	log.Println("Writing EdgeSets")
	var (
		lastSrc  *spb.VName
		pes      *srvpb.PagedEdgeSet
		grp      *srvpb.EdgeSet_Group
		pesTotal int
	)
	if err := gs.Scan(ctx, new(spb.ScanRequest), func(e *spb.Entry) error {
		if e.EdgeKind == "" {
			panic("non-edge entry")
		}

		if pes != nil && !compare.VNamesEqual(lastSrc, e.Source) {
			if grp != nil {
				pes.EdgeSet.Group = append(pes.EdgeSet.Group, grp)
				pesTotal += len(grp.TargetTicket)
			}
			pes.TotalEdges = int32(pesTotal)
			if err := t.Put(xsrv.EdgeSetKey(pes.EdgeSet.SourceTicket), pes); err != nil {
				return err
			}
			pes = nil
			grp = nil
			pesTotal = 0
		}
		if pes == nil {
			pes = &srvpb.PagedEdgeSet{
				EdgeSet: &srvpb.EdgeSet{
					SourceTicket: kytheuri.ToString(e.Source),
				},
			}
		}

		if grp != nil && grp.Kind != e.EdgeKind {
			pes.EdgeSet.Group = append(pes.EdgeSet.Group, grp)
			pesTotal += len(grp.TargetTicket)
			grp = nil
		}
		if grp == nil {
			grp = &srvpb.EdgeSet_Group{
				Kind: e.EdgeKind,
			}
		}

		grp.TargetTicket = append(grp.TargetTicket, kytheuri.ToString(e.Target))
		lastSrc = e.Source
		return nil
	}); err != nil {
		return err
	}
	if pes != nil {
		if grp != nil {
			pes.EdgeSet.Group = append(pes.EdgeSet.Group, grp)
			pesTotal += len(grp.TargetTicket)
		}
		pes.TotalEdges = int32(pesTotal)
		if err := t.Put(xsrv.EdgeSetKey(pes.EdgeSet.SourceTicket), pes); err != nil {
			return err
		}
	}
	return nil
}
Beispiel #8
0
func combineNodesAndEdges(ctx context.Context, opts *Options, out *servingOutput, gsEntries <-chan *spb.Entry) (disksort.Interface, error) {
	log.Println("Writing partial edges")

	tree := filetree.NewMap()

	partialSorter, err := opts.diskSorter(edgeLesser{}, edgeMarshaler{})
	if err != nil {
		return nil, err
	}

	bIdx := out.idx.Buffered()
	var src *spb.VName
	var entries []*spb.Entry
	for e := range gsEntries {
		if e.FactName == schema.NodeKindFact && string(e.FactValue) == schema.FileKind {
			tree.AddFile(e.Source)
			// TODO(schroederc): evict finished directories (based on GraphStore order)
		}

		if src == nil {
			src = e.Source
		} else if !compare.VNamesEqual(e.Source, src) {
			if err := writePartialEdges(ctx, partialSorter, bIdx, assemble.SourceFromEntries(entries)); err != nil {
				drainEntries(gsEntries)
				return nil, err
			}
			src = e.Source
			entries = nil
		}

		entries = append(entries, e)
	}
	if len(entries) > 0 {
		if err := writePartialEdges(ctx, partialSorter, bIdx, assemble.SourceFromEntries(entries)); err != nil {
			return nil, err
		}
	}
	if err := bIdx.Flush(ctx); err != nil {
		return nil, err
	}

	if err := writeFileTree(ctx, tree, out.xs); err != nil {
		return nil, fmt.Errorf("error writing file tree: %v", err)
	}
	tree = nil

	log.Println("Writing complete edges")

	cSorter, err := opts.diskSorter(edgeLesser{}, edgeMarshaler{})
	if err != nil {
		return nil, err
	}

	var n *srvpb.Node
	if err := partialSorter.Read(func(i interface{}) error {
		e := i.(*srvpb.Edge)
		if n == nil || n.Ticket != e.Source.Ticket {
			n = e.Source
			return cSorter.Add(e)
		} else if e.Target != nil {
			e.Source = n
			if err := writeCompletedEdges(ctx, cSorter, e); err != nil {
				return fmt.Errorf("error writing complete edge: %v", err)
			}
		}
		return nil
	}); err != nil {
		return nil, fmt.Errorf("error reading/writing edges: %v", err)
	}

	return cSorter, nil
}
Beispiel #9
0
func main() {
	flag.Parse()
	if len(flag.Args()) > 0 {
		flagutil.UsageErrorf("unknown arguments: %v", flag.Args())
	}

	in := bufio.NewReaderSize(os.Stdin, 2*4096)
	out := bufio.NewWriter(os.Stdout)

	var rd stream.EntryReader
	if *readJSON {
		rd = stream.NewJSONReader(in)
	} else {
		rd = stream.NewReader(in)
	}

	if *sortStream || *entrySets || *uniqEntries {
		var err error
		rd, err = sortEntries(rd)
		failOnErr(err)
	}

	if *uniqEntries {
		rd = dedupEntries(rd)
	}

	switch {
	case *countOnly:
		var count int
		failOnErr(rd(func(_ *spb.Entry) error {
			count++
			return nil
		}))
		fmt.Println(count)
	case *entrySets:
		encoder := json.NewEncoder(out)
		var set entrySet
		failOnErr(rd(func(entry *spb.Entry) error {
			if !compare.VNamesEqual(set.Source, entry.Source) || !compare.VNamesEqual(set.Target, entry.Target) || set.EdgeKind != entry.EdgeKind {
				if len(set.Properties) != 0 {
					if err := encoder.Encode(set); err != nil {
						return err
					}
				}
				set.Source = entry.Source
				set.EdgeKind = entry.EdgeKind
				set.Target = entry.Target
				set.Properties = make(map[string]string)
			}
			set.Properties[entry.FactName] = string(entry.FactValue)
			return nil
		}))
		if len(set.Properties) != 0 {
			failOnErr(encoder.Encode(set))
		}
	case *writeJSON:
		encoder := json.NewEncoder(out)
		failOnErr(rd(func(entry *spb.Entry) error {
			return encoder.Encode(entry)
		}))
	default:
		wr := delimited.NewWriter(out)
		failOnErr(rd(func(entry *spb.Entry) error {
			rec, err := proto.Marshal(entry)
			if err != nil {
				return err
			}
			return wr.Put(rec)
		}))
	}
	failOnErr(out.Flush())
}
Beispiel #10
0
// EntryMatchesScan reports whether entry belongs in the result set for req.
func EntryMatchesScan(req *spb.ScanRequest, entry *spb.Entry) bool {
	return (req.GetTarget() == nil || compare.VNamesEqual(entry.Target, req.Target)) &&
		(req.EdgeKind == "" || entry.EdgeKind == req.EdgeKind) &&
		strings.HasPrefix(entry.FactName, req.FactPrefix)
}
Beispiel #11
0
func combineNodesAndEdges(ctx context.Context, out *servingOutput, gsEntries <-chan *spb.Entry) error {
	log.Println("Writing partial edges")

	tree := filetree.NewMap()

	var src *spb.VName
	var entries []*spb.Entry
	for e := range gsEntries {
		if e.FactName == schema.NodeKindFact && string(e.FactValue) == schema.FileKind {
			tree.AddFile(e.Source)
			// TODO(schroederc): evict finished directories (based on GraphStore order)
		}

		if src == nil {
			src = e.Source
		} else if !compare.VNamesEqual(e.Source, src) {
			if err := writePartialEdges(ctx, out, assemble.SourceFromEntries(entries)); err != nil {
				drainEntries(gsEntries)
				return err
			}
			src = e.Source
			entries = nil
		}

		entries = append(entries, e)
	}
	if len(entries) > 0 {
		if err := writePartialEdges(ctx, out, assemble.SourceFromEntries(entries)); err != nil {
			return err
		}
	}

	if err := writeFileTree(ctx, tree, out.xs); err != nil {
		return fmt.Errorf("error writing file tree: %v", err)
	}
	tree = nil

	log.Println("Writing complete edges")
	snapshot := out.completeEdges.NewSnapshot()
	defer snapshot.Close()
	it, err := out.completeEdges.ScanPrefix(nil, &keyvalue.Options{
		LargeRead: true,
		Snapshot:  snapshot,
	})
	if err != nil {
		return err
	}
	defer it.Close()

	var n *srvpb.Node
	var e srvpb.Edge
	for {
		k, v, err := it.Next()
		if err == io.EOF {
			break
		} else if err != nil {
			return fmt.Errorf("error scanning partial edges table: %v", err)
		}

		ss := strings.Split(string(k), tempTableKeySep)
		if len(ss) != 3 {
			return fmt.Errorf("invalid partial edge table key: %q", string(k))
		}

		if err := proto.Unmarshal(v, &e); err != nil {
			return fmt.Errorf("invalid partial edge table value: %v", err)
		}

		if n == nil || n.Ticket != ss[0] {
			n = e.Source
		} else if e.Target != nil {
			e.Source = n
			if err := writeCompletedEdges(ctx, out.completeEdges, &e); err != nil {
				return fmt.Errorf("error writing complete edge: %v", err)
			}
		}
	}

	return nil
}