Beispiel #1
0
func main() {
	flag.Parse()
	if len(flag.Args()) != 0 {
		flagutil.UsageErrorf("unknown arguments: %v", flag.Args())
	}

	written := make(map[[sha512.Size384]byte]struct{})

	var skipped uint64
	rd := delimited.NewReader(os.Stdin)
	wr := delimited.NewWriter(os.Stdout)
	for {
		rec, err := rd.Next()
		if err == io.EOF {
			break
		} else if err != nil {
			log.Fatal(err)
		}

		hash := sha512.Sum384(rec)
		if _, ok := written[hash]; ok {
			skipped++
			continue
		}
		if err := wr.Put(rec); err != nil {
			log.Fatal(err)
		}
		written[hash] = struct{}{}
	}
	log.Printf("dedup_stream: skipped %d records", skipped)
}
Beispiel #2
0
func main() {
	flag.Parse()

	// done is sent a value when the analyzer should exit
	done := make(chan struct{}, 1)
	defer func() { done <- struct{}{} }()

	analyzerBin, analyzerArgs, compilations := parseAnalyzerCommand()
	if len(compilations) == 0 {
		flagutil.UsageError("Missing kindex-file paths")
	}

	cmd := exec.Command(analyzerBin, analyzerArgs...)
	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr
	var proc *os.Process
	if err := process.StartAsync(cmd, &process.Callbacks{
		OnStart: func(p *os.Process) {
			log.Printf("Starting analyzer subprocess: %s", strings.Join(cmd.Args, " "))
			proc = p
		},
		OnExit: func(state *os.ProcessState, err error) {
			select {
			case <-done:
			default:
				log.Fatalf("Analyzer subprocess exited unexpectedly (state:%v; error:%v)", state, err)
			}
		},
	}); err != nil {
		log.Fatalf("Error starting analyzer: %v", err)
	}

	addr := fmt.Sprintf("localhost:%d", *analyzerPort)
	conn, err := grpc.Dial(addr, grpc.WithInsecure())
	if err != nil {
		log.Fatalf("Error dialing analyzer %q: %v", addr, err)
	}
	defer conn.Close()

	queue := local.NewKIndexQueue(compilations)
	fdsAddr := launchFileDataService(queue)

	wr := delimited.NewWriter(os.Stdout)

	driver := &driver.Driver{
		Analyzer: &remote.Analyzer{aspb.NewCompilationAnalyzerClient(conn)},
		Output:   func(_ context.Context, out *apb.AnalysisOutput) error { return wr.Put(out.Value) },

		FileDataService: fdsAddr,
		Compilations:    queue,
	}

	if err := driver.Run(context.Background()); err != nil {
		log.Fatal(err)
	}

	if err := proc.Signal(os.Interrupt); err != nil {
		log.Fatalf("Failed to send interrupt to analyzer: %v", err)
	}
}
Beispiel #3
0
func testBuffer(entries []*spb.Entry) *bytes.Buffer {
	buf := bytes.NewBuffer(nil)
	wr := delimited.NewWriter(buf)
	for _, e := range entries {
		if err := wr.PutProto(e); err != nil {
			panic(err)
		}
	}
	return buf
}
Beispiel #4
0
func main() {
	flag.Parse()
	if len(flag.Args()) > 0 {
		flagutil.UsageErrorf("unknown arguments: %v", flag.Args())
	}

	in := os.Stdin
	var entries <-chan *spb.Entry
	if *readJSON {
		entries = stream.ReadJSONEntries(in)
	} else {
		entries = stream.ReadEntries(in)
	}
	if *sortStream || *entrySets {
		entries = sortEntries(entries)
	}

	encoder := json.NewEncoder(os.Stdout)
	wr := delimited.NewWriter(os.Stdout)

	var set entrySet
	entryCount := 0
	for entry := range entries {
		if *countOnly {
			entryCount++
		} else if *entrySets {
			if compare.VNamesEqual(set.Source, entry.Source) || !compare.VNamesEqual(set.Target, entry.Target) || set.EdgeKind != entry.EdgeKind {
				if len(set.Properties) != 0 {
					failOnErr(encoder.Encode(set))
				}
				set.Source = entry.Source
				set.EdgeKind = entry.EdgeKind
				set.Target = entry.Target
				set.Properties = make(map[string]string)
			}
			set.Properties[entry.FactName] = string(entry.FactValue)
		} else if *writeJSON {
			failOnErr(encoder.Encode(entry))
		} else {
			rec, err := proto.Marshal(entry)
			failOnErr(err)
			failOnErr(wr.Put(rec))
		}
	}
	if len(set.Properties) != 0 {
		failOnErr(encoder.Encode(set))
	}
	if *countOnly {
		fmt.Println(entryCount)
	}
}
Beispiel #5
0
func main() {
	flag.Parse()
	if flag.NArg() != 0 {
		flagutil.UsageErrorf("unknown arguments: %v", flag.Args())
	}

	rd, err := delimited.NewUniqReader(delimited.NewReader(os.Stdin), int(cacheSize.Bytes()))
	if err != nil {
		log.Fatalf("Error creating UniqReader: %v", err)
	}
	wr := delimited.NewWriter(os.Stdout)
	if err := delimited.Copy(wr, rd); err != nil {
		log.Fatal(err)
	}
	log.Printf("dedup_stream: skipped %d records", rd.Skipped())
}
Beispiel #6
0
func (m *mergeSorter) dumpShard() (err error) {
	defer func() {
		m.buffer = make([]interface{}, 0, m.opts.MaxInMemory)
	}()

	// Create a new shard file
	shardPath := filepath.Join(m.workDir, fmt.Sprintf("shard.%.6d", len(m.shards)))
	file, err := os.OpenFile(shardPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, shardFileMode)
	if err != nil {
		return fmt.Errorf("error creating shard: %v", err)
	}
	defer func() {
		replaceErrIfNil(&err, "error closing shard: %v", file.Close())
	}()

	w := io.Writer(file)
	if m.opts.CompressShards {
		w = snappy.NewWriter(w)
	}

	// Buffer writing to the shard
	buf := bufio.NewWriterSize(w, m.opts.IOBufferSize)
	defer func() {
		replaceErrIfNil(&err, "error flushing shard: %v", buf.Flush())
	}()

	// Sort the in-memory buffer of elements
	sortutil.Sort(m.opts.Lesser, m.buffer)

	// Write each element of the in-memory to shard file, in sorted order
	wr := delimited.NewWriter(buf)
	for len(m.buffer) > 0 {
		rec, err := m.opts.Marshaler.Marshal(m.buffer[0])
		if err != nil {
			return fmt.Errorf("marshaling error: %v", err)
		}
		if _, err := wr.Write(rec); err != nil {
			return fmt.Errorf("writing error: %v", err)
		}
		m.buffer = m.buffer[1:]
	}

	m.shards = append(m.shards, shardPath)
	return nil
}
Beispiel #7
0
// WriteTo implements the io.WriterTo interface, writing the contents of the
// Compilation in index file format.  Returns the total number of bytes written
// after GZip compression was applied.
func (c *Compilation) WriteTo(w io.Writer) (int64, error) {
	gz, err := gzip.NewWriterLevel(w, gzip.BestCompression)
	if err != nil {
		return 0, err
	}
	w = delimited.NewWriter(gz)

	buf := proto.NewBuffer(nil)
	if err := buf.Marshal(c.Proto); err != nil {
		gz.Close()
		return 0, fmt.Errorf("marshalling compilation: %v", err)
	}

	var total int64

	nw, err := w.Write(buf.Bytes())
	total += int64(nw)
	if err != nil {
		gz.Close()
		return total, fmt.Errorf("writing compilation: %v", err)
	}

	for _, file := range c.Files {
		buf.Reset()
		if err := buf.Marshal(file); err != nil {
			gz.Close()
			return total, fmt.Errorf("marshaling file data: %v", err)
		}
		nw, err := w.Write(buf.Bytes())
		total += int64(nw)
		if err != nil {
			gz.Close()
			return total, fmt.Errorf("writing file data: %v", err)
		}
	}
	if err := gz.Close(); err != nil {
		return total, err
	}
	return total, nil
}
Beispiel #8
0
func main() {
	flag.Parse()
	if len(flag.Args()) > 0 {
		flagutil.UsageErrorf("unknown arguments: %v", flag.Args())
	}

	in := bufio.NewReaderSize(os.Stdin, 2*4096)
	out := bufio.NewWriter(os.Stdout)

	var rd stream.EntryReader
	if *readJSON {
		rd = stream.NewJSONReader(in)
	} else {
		rd = stream.NewReader(in)
	}

	if *sortStream || *entrySets || *uniqEntries {
		var err error
		rd, err = sortEntries(rd)
		failOnErr(err)
	}

	if *uniqEntries {
		rd = dedupEntries(rd)
	}

	switch {
	case *countOnly:
		var count int
		failOnErr(rd(func(_ *spb.Entry) error {
			count++
			return nil
		}))
		fmt.Println(count)
	case *entrySets:
		encoder := json.NewEncoder(out)
		var set entrySet
		failOnErr(rd(func(entry *spb.Entry) error {
			if !compare.VNamesEqual(set.Source, entry.Source) || !compare.VNamesEqual(set.Target, entry.Target) || set.EdgeKind != entry.EdgeKind {
				if len(set.Properties) != 0 {
					if err := encoder.Encode(set); err != nil {
						return err
					}
				}
				set.Source = entry.Source
				set.EdgeKind = entry.EdgeKind
				set.Target = entry.Target
				set.Properties = make(map[string]string)
			}
			set.Properties[entry.FactName] = string(entry.FactValue)
			return nil
		}))
		if len(set.Properties) != 0 {
			failOnErr(encoder.Encode(set))
		}
	case *writeJSON:
		encoder := json.NewEncoder(out)
		failOnErr(rd(func(entry *spb.Entry) error {
			return encoder.Encode(entry)
		}))
	default:
		wr := delimited.NewWriter(out)
		failOnErr(rd(func(entry *spb.Entry) error {
			rec, err := proto.Marshal(entry)
			if err != nil {
				return err
			}
			return wr.Put(rec)
		}))
	}
	failOnErr(out.Flush())
}
Beispiel #9
0
var (
	vnamesConfigPath = flag.String("vnames", "", "Path to JSON VNames configuration")
	exclude          = flag.String("exclude", "", "Comma-separated list of exclude regexp patterns")
	verbose          = flag.Bool("verbose", false, "Print verbose logging")
	emitIrregular    = flag.Bool("emit_irregular", false, "Emit nodes for irregular files")
)

var (
	kindLabel = "/kythe/node/kind"
	textLabel = "/kythe/text"

	fileKind = []byte("file")
)

var w = delimited.NewWriter(os.Stdout)

func emitEntry(v *spb.VName, label string, value []byte) error {
	return w.PutProto(&spb.Entry{Source: v, FactName: label, FactValue: value})
}

var (
	fileRules vnameutil.Rules
	excludes  []*regexp.Regexp
)

func emitPath(path string, info os.FileInfo, err error) error {
	if info.IsDir() || !(*emitIrregular || info.Mode().IsRegular()) {
		return nil
	}
	for _, re := range excludes {
Beispiel #10
0
func main() {
	flag.Parse()
	if gs == nil {
		flagutil.UsageError("missing --graphstore")
	} else if *shardsToFiles != "" && *shards <= 0 {
		flagutil.UsageError("--sharded_file and --shards must be given together")
	} else if *shards > 0 && len(flag.Args()) > 0 {
		flagutil.UsageError("--shards and giving tickets for reads are mutually exclusive")
	}

	ctx := context.Background()

	wr := delimited.NewWriter(os.Stdout)
	var total int64
	if *shards <= 0 {
		entryFunc := func(entry *spb.Entry) error {
			if *count {
				total++
				return nil
			}
			return wr.PutProto(entry)
		}
		if len(flag.Args()) > 0 {
			if *targetTicket != "" || *factPrefix != "" {
				log.Fatal("--target and --fact_prefix are unsupported when given tickets")
			}
			if err := readEntries(ctx, gs, entryFunc, *edgeKind, flag.Args()); err != nil {
				log.Fatal(err)
			}
		} else {
			if err := scanEntries(ctx, gs, entryFunc, *edgeKind, *targetTicket, *factPrefix); err != nil {
				log.Fatal(err)
			}
		}
		if *count {
			fmt.Println(total)
		}
		return
	}

	sgs, ok := gs.(graphstore.Sharded)
	if !ok {
		log.Fatalf("Sharding unsupported for given GraphStore type: %T", gs)
	} else if *shardIndex >= *shards {
		log.Fatalf("Invalid shard index for %d shards: %d", *shards, *shardIndex)
	}

	if *count {
		cnt, err := sgs.Count(ctx, &spb.CountRequest{Index: *shardIndex, Shards: *shards})
		if err != nil {
			log.Fatalf("ERROR: %v", err)
		}
		fmt.Println(cnt)
		return
	} else if *shardsToFiles != "" {
		var wg sync.WaitGroup
		wg.Add(int(*shards))
		for i := int64(0); i < *shards; i++ {
			go func(i int64) {
				defer wg.Done()
				path := fmt.Sprintf("%s-%.5d-of-%.5d", *shardsToFiles, i, *shards)
				f, err := vfs.Create(ctx, path)
				if err != nil {
					log.Fatalf("Failed to create file %q: %v", path, err)
				}
				defer f.Close()
				wr := delimited.NewWriter(f)
				if err := sgs.Shard(ctx, &spb.ShardRequest{
					Index:  i,
					Shards: *shards,
				}, func(entry *spb.Entry) error {
					return wr.PutProto(entry)
				}); err != nil {
					log.Fatalf("GraphStore shard scan error: %v", err)
				}
			}(i)
		}
		wg.Wait()
		return
	}

	if err := sgs.Shard(ctx, &spb.ShardRequest{
		Index:  *shardIndex,
		Shards: *shards,
	}, func(entry *spb.Entry) error {
		return wr.PutProto(entry)
	}); err != nil {
		log.Fatalf("GraphStore shard scan error: %v", err)
	}
}