func main() { flag.Parse() if flag.NFlag() == 0 { fmt.Println("wc [-l] [-m] [-w] [-b] file [...fileN]") flag.PrintDefaults() os.Exit(1) } // default to stdin if flag.NArg() == 0 { c := wc.NewCounter(os.Stdin) err := c.Count(*multibytes, *bytes, *lines, *words) if err != nil { log.Fatal(err) } if *lines { fmt.Printf("% 10d ", c.Lines) } if *words { fmt.Printf("% 10d ", c.Words) } if *multibytes { fmt.Printf("% 10d ", c.Multibytes) } if *bytes { fmt.Printf("% 10d ", c.Bytes) } } else { var multibytes_total, lines_total, words_total, bytes_total uint64 for _, filepath := range flag.Args() { file, err := os.Open(filepath) if err != nil { log.Fatal(err) } c := wc.NewCounter(file) err = c.Count(*multibytes, *bytes, *lines, *words) if err != nil { log.Fatal(err) } file.Close() if *lines { lines_total += c.Lines fmt.Printf("% 10d ", c.Lines) } if *words { words_total += c.Words fmt.Printf("% 10d ", c.Words) } if *multibytes { multibytes_total += c.Multibytes fmt.Printf("% 10d ", c.Multibytes) } if *bytes { bytes_total += c.Bytes fmt.Printf("% 10d ", c.Bytes) } fmt.Printf("%s\n", filepath) } if flag.NArg() > 1 { if *lines { fmt.Printf("% 10d ", lines_total) } if *words { fmt.Printf("% 10d ", words_total) } if *multibytes { fmt.Printf("% 10d ", multibytes_total) } if *bytes { fmt.Printf("% 10d ", bytes_total) } fmt.Print("total\n") } } }
func main() { flag.Parse() if !*intersection && !*diff && !*union { fmt.Println(`Usage: tt -[i,d,u] [-c] [-trim] [-match "regex"] [-capture "regex"] [-large [-estimated_lines N]] file1 file2[ file3..]`) flag.PrintDefaults() os.Exit(1) } start := time.Now() var stdout WriteFlusher if *devnull { stdout = new(DevNullWriter) } else { // buffered io stdout = bufio.NewWriterSize(os.Stdout, *buffer_size) } defer func() { stdout.Flush() fmt.Fprintln(os.Stderr, "** Token Report **") fmt.Fprintln(os.Stderr, "Lines scanned: ", total_lines_scanned) if *match_regex != "" { fmt.Fprintln(os.Stderr, "Lines matched: ", total_lines_matched) } fmt.Fprintln(os.Stderr, "Tokens emitted: ", total_tokens_emitted) fmt.Fprintln(os.Stderr, "Time: ", time.Since(start)) }() file_paths := flag.Args() fmt.Fprintln(os.Stderr, "tt starting up") // if no estimate supplied, count lines if *large && *estimated_lines == 0 { var bytes_to_process uint64 for _, file_path := range file_paths { file, err := os.Open(file_path) if err != nil { log.Fatal(err) } counter := wc.NewCounter(file) err = counter.Count(false, true, true, false) if err != nil { log.Fatal(err) } *estimated_lines += counter.Lines bytes_to_process += counter.Bytes file.Close() } fmt.Fprintln(os.Stderr, "Bytes to process: ", bytes_to_process) fmt.Fprintln(os.Stderr, "Lines to process: ", *estimated_lines) } if *large { if *union { unique_set := NewScalableBloom(*estimated_lines) for _, file_path := range file_paths { e, err := NewEmitter(file_path, *match_regex, *capture_regex, *buffer_size) if err != nil { log.Fatal(err) } for e.Scan() { token := e.Bytes() if !unique_set.Check(token) { total_tokens_emitted++ stdout.Write(token) stdout.WriteByte('\n') unique_set.Add(token) } } e.Close() total_lines_scanned += e.LinesScanned } return } // multi file handling below sets := make([]bloom.Bloom, len(file_paths)) // may require throttling due to disk thrashing // initial scan to fill the bloom filters for i, file_path := range file_paths { set := NewScalableBloom(*estimated_lines) e, err := NewEmitter(file_path, *match_regex, *capture_regex, *buffer_size) if err != nil { log.Fatal(err) } for e.Scan() { set.Add(e.Bytes()) } e.Close() sets[i] = set } // do the work switch { // unique set of tokens that exist in all files case *intersection: echoed_set := NewScalableBloom(*estimated_lines) for _, file_path := range file_paths { e, err := NewEmitter(file_path, *match_regex, *capture_regex, *buffer_size) if err != nil { log.Fatal(err) } NEXT_TOKEN: for e.Scan() { token := e.Bytes() if echoed_set.Check(token) { goto NEXT_TOKEN } for _, set := range sets { if !set.Check(token) { goto NEXT_TOKEN } } total_tokens_emitted++ stdout.Write(token) stdout.WriteByte('\n') echoed_set.Add(token) } total_lines_scanned += e.LinesScanned e.Close() } // unique set of tokens not in the intersection case *diff: echoed_set := NewScalableBloom(*estimated_lines) for _, file_path := range file_paths { e, err := NewEmitter(file_path, *match_regex, *capture_regex, *buffer_size) if err != nil { log.Fatal(err) } for e.Scan() { token := e.Bytes() if echoed_set.Check(token) { continue } for _, set := range sets { if !set.Check(token) { total_tokens_emitted++ stdout.Write(token) stdout.WriteByte('\n') echoed_set.Add(token) } } } total_lines_scanned += e.LinesScanned e.Close() } } // defaults to map solution } else { if *union { unique_set := make(map[string]int) for _, file_path := range file_paths { e, err := NewEmitter(file_path, *match_regex, *capture_regex, *buffer_size) if err != nil { log.Fatal(err) } for e.Scan() { unique_set[e.Text()]++ } total_lines_scanned += e.LinesScanned e.Close() } if *count { for token, ct := range unique_set { total_tokens_emitted++ fmt.Fprintf(stdout, "%d: %s\n", ct, token) } } else { for token, _ := range unique_set { total_tokens_emitted++ stdout.WriteString(token) stdout.WriteByte('\n') } } return } // multi file handling below sets := make([]map[string]bool, len(file_paths)) // may require throttling due to disk thrashing // initial scan to fill the bloom filters for i, file_path := range file_paths { set := make(map[string]bool) e, err := NewEmitter(file_path, *match_regex, *capture_regex, *buffer_size) if err != nil { log.Fatal(err) } for e.Scan() { set[e.Text()] = true } e.Close() sets[i] = set } // do the work switch { // unique set of tokens that exist in all files case *intersection: echoed_set := make(map[string]bool) for _, file_path := range file_paths { e, err := NewEmitter(file_path, *match_regex, *capture_regex, *buffer_size) if err != nil { log.Fatal(err) } NEXT_TOKEN2: for e.Scan() { token := e.Text() if _, echoed := echoed_set[token]; echoed { goto NEXT_TOKEN2 } for _, set := range sets { if _, in_this_set := set[token]; !in_this_set { goto NEXT_TOKEN2 } } total_tokens_emitted++ stdout.WriteString(token) stdout.WriteByte('\n') echoed_set[token] = true } total_lines_scanned += e.LinesScanned e.Close() } // unique set of tokens not in the intersection case *diff: echoed_set := make(map[string]bool) for _, file_path := range file_paths { e, err := NewEmitter(file_path, *match_regex, *capture_regex, *buffer_size) if err != nil { log.Fatal(err) } for e.Scan() { token := e.Text() if _, echoed := echoed_set[token]; echoed { continue } for _, set := range sets { if _, in_this_set := set[token]; !in_this_set { total_tokens_emitted++ stdout.WriteString(token) stdout.WriteByte('\n') echoed_set[token] = true break } } } total_lines_scanned += e.LinesScanned e.Close() } } } }