func main() { flag.Parse() if len(flag.Args()) != 0 { flagutil.UsageErrorf("unknown arguments: %v", flag.Args()) } written := make(map[[sha512.Size384]byte]struct{}) var skipped uint64 rd := delimited.NewReader(os.Stdin) wr := delimited.NewWriter(os.Stdout) for { rec, err := rd.Next() if err == io.EOF { break } else if err != nil { log.Fatal(err) } hash := sha512.Sum384(rec) if _, ok := written[hash]; ok { skipped++ continue } if err := wr.Put(rec); err != nil { log.Fatal(err) } written[hash] = struct{}{} } log.Printf("dedup_stream: skipped %d records", skipped) }
func main() { flag.Parse() // done is sent a value when the analyzer should exit done := make(chan struct{}, 1) defer func() { done <- struct{}{} }() analyzerBin, analyzerArgs, compilations := parseAnalyzerCommand() if len(compilations) == 0 { flagutil.UsageError("Missing kindex-file paths") } cmd := exec.Command(analyzerBin, analyzerArgs...) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr var proc *os.Process if err := process.StartAsync(cmd, &process.Callbacks{ OnStart: func(p *os.Process) { log.Printf("Starting analyzer subprocess: %s", strings.Join(cmd.Args, " ")) proc = p }, OnExit: func(state *os.ProcessState, err error) { select { case <-done: default: log.Fatalf("Analyzer subprocess exited unexpectedly (state:%v; error:%v)", state, err) } }, }); err != nil { log.Fatalf("Error starting analyzer: %v", err) } addr := fmt.Sprintf("localhost:%d", *analyzerPort) conn, err := grpc.Dial(addr, grpc.WithInsecure()) if err != nil { log.Fatalf("Error dialing analyzer %q: %v", addr, err) } defer conn.Close() queue := local.NewKIndexQueue(compilations) fdsAddr := launchFileDataService(queue) wr := delimited.NewWriter(os.Stdout) driver := &driver.Driver{ Analyzer: &remote.Analyzer{aspb.NewCompilationAnalyzerClient(conn)}, Output: func(_ context.Context, out *apb.AnalysisOutput) error { return wr.Put(out.Value) }, FileDataService: fdsAddr, Compilations: queue, } if err := driver.Run(context.Background()); err != nil { log.Fatal(err) } if err := proc.Signal(os.Interrupt); err != nil { log.Fatalf("Failed to send interrupt to analyzer: %v", err) } }
func testBuffer(entries []*spb.Entry) *bytes.Buffer { buf := bytes.NewBuffer(nil) wr := delimited.NewWriter(buf) for _, e := range entries { if err := wr.PutProto(e); err != nil { panic(err) } } return buf }
func main() { flag.Parse() if len(flag.Args()) > 0 { flagutil.UsageErrorf("unknown arguments: %v", flag.Args()) } in := os.Stdin var entries <-chan *spb.Entry if *readJSON { entries = stream.ReadJSONEntries(in) } else { entries = stream.ReadEntries(in) } if *sortStream || *entrySets { entries = sortEntries(entries) } encoder := json.NewEncoder(os.Stdout) wr := delimited.NewWriter(os.Stdout) var set entrySet entryCount := 0 for entry := range entries { if *countOnly { entryCount++ } else if *entrySets { if compare.VNamesEqual(set.Source, entry.Source) || !compare.VNamesEqual(set.Target, entry.Target) || set.EdgeKind != entry.EdgeKind { if len(set.Properties) != 0 { failOnErr(encoder.Encode(set)) } set.Source = entry.Source set.EdgeKind = entry.EdgeKind set.Target = entry.Target set.Properties = make(map[string]string) } set.Properties[entry.FactName] = string(entry.FactValue) } else if *writeJSON { failOnErr(encoder.Encode(entry)) } else { rec, err := proto.Marshal(entry) failOnErr(err) failOnErr(wr.Put(rec)) } } if len(set.Properties) != 0 { failOnErr(encoder.Encode(set)) } if *countOnly { fmt.Println(entryCount) } }
func main() { flag.Parse() if flag.NArg() != 0 { flagutil.UsageErrorf("unknown arguments: %v", flag.Args()) } rd, err := delimited.NewUniqReader(delimited.NewReader(os.Stdin), int(cacheSize.Bytes())) if err != nil { log.Fatalf("Error creating UniqReader: %v", err) } wr := delimited.NewWriter(os.Stdout) if err := delimited.Copy(wr, rd); err != nil { log.Fatal(err) } log.Printf("dedup_stream: skipped %d records", rd.Skipped()) }
func (m *mergeSorter) dumpShard() (err error) { defer func() { m.buffer = make([]interface{}, 0, m.opts.MaxInMemory) }() // Create a new shard file shardPath := filepath.Join(m.workDir, fmt.Sprintf("shard.%.6d", len(m.shards))) file, err := os.OpenFile(shardPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, shardFileMode) if err != nil { return fmt.Errorf("error creating shard: %v", err) } defer func() { replaceErrIfNil(&err, "error closing shard: %v", file.Close()) }() w := io.Writer(file) if m.opts.CompressShards { w = snappy.NewWriter(w) } // Buffer writing to the shard buf := bufio.NewWriterSize(w, m.opts.IOBufferSize) defer func() { replaceErrIfNil(&err, "error flushing shard: %v", buf.Flush()) }() // Sort the in-memory buffer of elements sortutil.Sort(m.opts.Lesser, m.buffer) // Write each element of the in-memory to shard file, in sorted order wr := delimited.NewWriter(buf) for len(m.buffer) > 0 { rec, err := m.opts.Marshaler.Marshal(m.buffer[0]) if err != nil { return fmt.Errorf("marshaling error: %v", err) } if _, err := wr.Write(rec); err != nil { return fmt.Errorf("writing error: %v", err) } m.buffer = m.buffer[1:] } m.shards = append(m.shards, shardPath) return nil }
// WriteTo implements the io.WriterTo interface, writing the contents of the // Compilation in index file format. Returns the total number of bytes written // after GZip compression was applied. func (c *Compilation) WriteTo(w io.Writer) (int64, error) { gz, err := gzip.NewWriterLevel(w, gzip.BestCompression) if err != nil { return 0, err } w = delimited.NewWriter(gz) buf := proto.NewBuffer(nil) if err := buf.Marshal(c.Proto); err != nil { gz.Close() return 0, fmt.Errorf("marshalling compilation: %v", err) } var total int64 nw, err := w.Write(buf.Bytes()) total += int64(nw) if err != nil { gz.Close() return total, fmt.Errorf("writing compilation: %v", err) } for _, file := range c.Files { buf.Reset() if err := buf.Marshal(file); err != nil { gz.Close() return total, fmt.Errorf("marshaling file data: %v", err) } nw, err := w.Write(buf.Bytes()) total += int64(nw) if err != nil { gz.Close() return total, fmt.Errorf("writing file data: %v", err) } } if err := gz.Close(); err != nil { return total, err } return total, nil }
func main() { flag.Parse() if len(flag.Args()) > 0 { flagutil.UsageErrorf("unknown arguments: %v", flag.Args()) } in := bufio.NewReaderSize(os.Stdin, 2*4096) out := bufio.NewWriter(os.Stdout) var rd stream.EntryReader if *readJSON { rd = stream.NewJSONReader(in) } else { rd = stream.NewReader(in) } if *sortStream || *entrySets || *uniqEntries { var err error rd, err = sortEntries(rd) failOnErr(err) } if *uniqEntries { rd = dedupEntries(rd) } switch { case *countOnly: var count int failOnErr(rd(func(_ *spb.Entry) error { count++ return nil })) fmt.Println(count) case *entrySets: encoder := json.NewEncoder(out) var set entrySet failOnErr(rd(func(entry *spb.Entry) error { if !compare.VNamesEqual(set.Source, entry.Source) || !compare.VNamesEqual(set.Target, entry.Target) || set.EdgeKind != entry.EdgeKind { if len(set.Properties) != 0 { if err := encoder.Encode(set); err != nil { return err } } set.Source = entry.Source set.EdgeKind = entry.EdgeKind set.Target = entry.Target set.Properties = make(map[string]string) } set.Properties[entry.FactName] = string(entry.FactValue) return nil })) if len(set.Properties) != 0 { failOnErr(encoder.Encode(set)) } case *writeJSON: encoder := json.NewEncoder(out) failOnErr(rd(func(entry *spb.Entry) error { return encoder.Encode(entry) })) default: wr := delimited.NewWriter(out) failOnErr(rd(func(entry *spb.Entry) error { rec, err := proto.Marshal(entry) if err != nil { return err } return wr.Put(rec) })) } failOnErr(out.Flush()) }
var ( vnamesConfigPath = flag.String("vnames", "", "Path to JSON VNames configuration") exclude = flag.String("exclude", "", "Comma-separated list of exclude regexp patterns") verbose = flag.Bool("verbose", false, "Print verbose logging") emitIrregular = flag.Bool("emit_irregular", false, "Emit nodes for irregular files") ) var ( kindLabel = "/kythe/node/kind" textLabel = "/kythe/text" fileKind = []byte("file") ) var w = delimited.NewWriter(os.Stdout) func emitEntry(v *spb.VName, label string, value []byte) error { return w.PutProto(&spb.Entry{Source: v, FactName: label, FactValue: value}) } var ( fileRules vnameutil.Rules excludes []*regexp.Regexp ) func emitPath(path string, info os.FileInfo, err error) error { if info.IsDir() || !(*emitIrregular || info.Mode().IsRegular()) { return nil } for _, re := range excludes {
func main() { flag.Parse() if gs == nil { flagutil.UsageError("missing --graphstore") } else if *shardsToFiles != "" && *shards <= 0 { flagutil.UsageError("--sharded_file and --shards must be given together") } else if *shards > 0 && len(flag.Args()) > 0 { flagutil.UsageError("--shards and giving tickets for reads are mutually exclusive") } ctx := context.Background() wr := delimited.NewWriter(os.Stdout) var total int64 if *shards <= 0 { entryFunc := func(entry *spb.Entry) error { if *count { total++ return nil } return wr.PutProto(entry) } if len(flag.Args()) > 0 { if *targetTicket != "" || *factPrefix != "" { log.Fatal("--target and --fact_prefix are unsupported when given tickets") } if err := readEntries(ctx, gs, entryFunc, *edgeKind, flag.Args()); err != nil { log.Fatal(err) } } else { if err := scanEntries(ctx, gs, entryFunc, *edgeKind, *targetTicket, *factPrefix); err != nil { log.Fatal(err) } } if *count { fmt.Println(total) } return } sgs, ok := gs.(graphstore.Sharded) if !ok { log.Fatalf("Sharding unsupported for given GraphStore type: %T", gs) } else if *shardIndex >= *shards { log.Fatalf("Invalid shard index for %d shards: %d", *shards, *shardIndex) } if *count { cnt, err := sgs.Count(ctx, &spb.CountRequest{Index: *shardIndex, Shards: *shards}) if err != nil { log.Fatalf("ERROR: %v", err) } fmt.Println(cnt) return } else if *shardsToFiles != "" { var wg sync.WaitGroup wg.Add(int(*shards)) for i := int64(0); i < *shards; i++ { go func(i int64) { defer wg.Done() path := fmt.Sprintf("%s-%.5d-of-%.5d", *shardsToFiles, i, *shards) f, err := vfs.Create(ctx, path) if err != nil { log.Fatalf("Failed to create file %q: %v", path, err) } defer f.Close() wr := delimited.NewWriter(f) if err := sgs.Shard(ctx, &spb.ShardRequest{ Index: i, Shards: *shards, }, func(entry *spb.Entry) error { return wr.PutProto(entry) }); err != nil { log.Fatalf("GraphStore shard scan error: %v", err) } }(i) } wg.Wait() return } if err := sgs.Shard(ctx, &spb.ShardRequest{ Index: *shardIndex, Shards: *shards, }, func(entry *spb.Entry) error { return wr.PutProto(entry) }); err != nil { log.Fatalf("GraphStore shard scan error: %v", err) } }