Exemple #1
0
func main() {

	flag.Parse()

	if flag.NFlag() == 0 {
		fmt.Println("wc [-l] [-m] [-w] [-b] file [...fileN]")
		flag.PrintDefaults()
		os.Exit(1)
	}

	// default to stdin
	if flag.NArg() == 0 {
		c := wc.NewCounter(os.Stdin)

		err := c.Count(*multibytes, *bytes, *lines, *words)
		if err != nil {
			log.Fatal(err)
		}

		if *lines {
			fmt.Printf("% 10d ", c.Lines)
		}
		if *words {
			fmt.Printf("% 10d ", c.Words)
		}
		if *multibytes {
			fmt.Printf("% 10d ", c.Multibytes)
		}
		if *bytes {
			fmt.Printf("% 10d ", c.Bytes)
		}
	} else {
		var multibytes_total, lines_total, words_total, bytes_total uint64

		for _, filepath := range flag.Args() {

			file, err := os.Open(filepath)
			if err != nil {
				log.Fatal(err)
			}

			c := wc.NewCounter(file)

			err = c.Count(*multibytes, *bytes, *lines, *words)
			if err != nil {
				log.Fatal(err)
			}

			file.Close()

			if *lines {
				lines_total += c.Lines
				fmt.Printf("% 10d ", c.Lines)
			}
			if *words {
				words_total += c.Words
				fmt.Printf("% 10d ", c.Words)
			}
			if *multibytes {
				multibytes_total += c.Multibytes
				fmt.Printf("% 10d ", c.Multibytes)
			}
			if *bytes {
				bytes_total += c.Bytes
				fmt.Printf("% 10d ", c.Bytes)
			}

			fmt.Printf("%s\n", filepath)
		}

		if flag.NArg() > 1 {

			if *lines {
				fmt.Printf("% 10d ", lines_total)
			}
			if *words {
				fmt.Printf("% 10d ", words_total)
			}
			if *multibytes {
				fmt.Printf("% 10d ", multibytes_total)
			}
			if *bytes {
				fmt.Printf("% 10d ", bytes_total)
			}

			fmt.Print("total\n")

		}
	}

}
Exemple #2
0
func main() {

	flag.Parse()

	if !*intersection && !*diff && !*union {
		fmt.Println(`Usage: tt -[i,d,u] [-c] [-trim] [-match "regex"] [-capture "regex"] [-large [-estimated_lines N]] file1 file2[ file3..]`)
		flag.PrintDefaults()
		os.Exit(1)
	}

	start := time.Now()

	var stdout WriteFlusher

	if *devnull {
		stdout = new(DevNullWriter)
	} else {
		// buffered io
		stdout = bufio.NewWriterSize(os.Stdout, *buffer_size)
	}

	defer func() {
		stdout.Flush()
		fmt.Fprintln(os.Stderr, "** Token Report **")
		fmt.Fprintln(os.Stderr, "Lines scanned: ", total_lines_scanned)
		if *match_regex != "" {
			fmt.Fprintln(os.Stderr, "Lines matched: ", total_lines_matched)
		}
		fmt.Fprintln(os.Stderr, "Tokens emitted: ", total_tokens_emitted)
		fmt.Fprintln(os.Stderr, "Time: ", time.Since(start))
	}()

	file_paths := flag.Args()

	fmt.Fprintln(os.Stderr, "tt starting up")

	// if no estimate supplied, count lines
	if *large && *estimated_lines == 0 {

		var bytes_to_process uint64

		for _, file_path := range file_paths {

			file, err := os.Open(file_path)
			if err != nil {
				log.Fatal(err)
			}

			counter := wc.NewCounter(file)
			err = counter.Count(false, true, true, false)
			if err != nil {
				log.Fatal(err)
			}

			*estimated_lines += counter.Lines
			bytes_to_process += counter.Bytes

			file.Close()
		}

		fmt.Fprintln(os.Stderr, "Bytes to process: ", bytes_to_process)
		fmt.Fprintln(os.Stderr, "Lines to process: ", *estimated_lines)
	}

	if *large {

		if *union {

			unique_set := NewScalableBloom(*estimated_lines)

			for _, file_path := range file_paths {

				e, err := NewEmitter(file_path, *match_regex, *capture_regex, *buffer_size)
				if err != nil {
					log.Fatal(err)
				}

				for e.Scan() {
					token := e.Bytes()
					if !unique_set.Check(token) {
						total_tokens_emitted++
						stdout.Write(token)
						stdout.WriteByte('\n')
						unique_set.Add(token)
					}
				}

				e.Close()

				total_lines_scanned += e.LinesScanned

			}

			return
		}

		// multi file handling below
		sets := make([]bloom.Bloom, len(file_paths))

		// may require throttling due to disk thrashing
		// initial scan to fill the bloom filters
		for i, file_path := range file_paths {

			set := NewScalableBloom(*estimated_lines)

			e, err := NewEmitter(file_path, *match_regex, *capture_regex, *buffer_size)
			if err != nil {
				log.Fatal(err)
			}

			for e.Scan() {
				set.Add(e.Bytes())
			}

			e.Close()

			sets[i] = set

		}

		// do the work
		switch {

		// unique set of tokens that exist in all files
		case *intersection:

			echoed_set := NewScalableBloom(*estimated_lines)

			for _, file_path := range file_paths {

				e, err := NewEmitter(file_path, *match_regex, *capture_regex, *buffer_size)
				if err != nil {
					log.Fatal(err)
				}

			NEXT_TOKEN:
				for e.Scan() {

					token := e.Bytes()

					if echoed_set.Check(token) {
						goto NEXT_TOKEN
					}

					for _, set := range sets {
						if !set.Check(token) {
							goto NEXT_TOKEN
						}
					}

					total_tokens_emitted++
					stdout.Write(token)
					stdout.WriteByte('\n')
					echoed_set.Add(token)

				}

				total_lines_scanned += e.LinesScanned

				e.Close()

			}

		// unique set of tokens not in the intersection
		case *diff:

			echoed_set := NewScalableBloom(*estimated_lines)

			for _, file_path := range file_paths {

				e, err := NewEmitter(file_path, *match_regex, *capture_regex, *buffer_size)
				if err != nil {
					log.Fatal(err)
				}

				for e.Scan() {

					token := e.Bytes()

					if echoed_set.Check(token) {
						continue
					}

					for _, set := range sets {
						if !set.Check(token) {
							total_tokens_emitted++
							stdout.Write(token)
							stdout.WriteByte('\n')
							echoed_set.Add(token)
						}
					}

				}

				total_lines_scanned += e.LinesScanned

				e.Close()

			}
		}

		// defaults to map solution
	} else {

		if *union {

			unique_set := make(map[string]int)

			for _, file_path := range file_paths {

				e, err := NewEmitter(file_path, *match_regex, *capture_regex, *buffer_size)
				if err != nil {
					log.Fatal(err)
				}

				for e.Scan() {
					unique_set[e.Text()]++
				}

				total_lines_scanned += e.LinesScanned

				e.Close()

			}

			if *count {
				for token, ct := range unique_set {
					total_tokens_emitted++
					fmt.Fprintf(stdout, "%d: %s\n", ct, token)
				}
			} else {
				for token, _ := range unique_set {
					total_tokens_emitted++
					stdout.WriteString(token)
					stdout.WriteByte('\n')
				}
			}

			return
		}

		// multi file handling below
		sets := make([]map[string]bool, len(file_paths))

		// may require throttling due to disk thrashing
		// initial scan to fill the bloom filters
		for i, file_path := range file_paths {

			set := make(map[string]bool)

			e, err := NewEmitter(file_path, *match_regex, *capture_regex, *buffer_size)
			if err != nil {
				log.Fatal(err)
			}

			for e.Scan() {
				set[e.Text()] = true
			}

			e.Close()

			sets[i] = set

		}

		// do the work
		switch {

		// unique set of tokens that exist in all files
		case *intersection:

			echoed_set := make(map[string]bool)

			for _, file_path := range file_paths {

				e, err := NewEmitter(file_path, *match_regex, *capture_regex, *buffer_size)
				if err != nil {
					log.Fatal(err)
				}

			NEXT_TOKEN2:
				for e.Scan() {

					token := e.Text()

					if _, echoed := echoed_set[token]; echoed {
						goto NEXT_TOKEN2
					}

					for _, set := range sets {
						if _, in_this_set := set[token]; !in_this_set {
							goto NEXT_TOKEN2
						}
					}

					total_tokens_emitted++
					stdout.WriteString(token)
					stdout.WriteByte('\n')

					echoed_set[token] = true

				}

				total_lines_scanned += e.LinesScanned

				e.Close()

			}

		// unique set of tokens not in the intersection
		case *diff:

			echoed_set := make(map[string]bool)

			for _, file_path := range file_paths {

				e, err := NewEmitter(file_path, *match_regex, *capture_regex, *buffer_size)
				if err != nil {
					log.Fatal(err)
				}

				for e.Scan() {

					token := e.Text()

					if _, echoed := echoed_set[token]; echoed {
						continue
					}

					for _, set := range sets {
						if _, in_this_set := set[token]; !in_this_set {
							total_tokens_emitted++
							stdout.WriteString(token)
							stdout.WriteByte('\n')

							echoed_set[token] = true
							break
						}
					}

				}

				total_lines_scanned += e.LinesScanned

				e.Close()

			}
		}

	}

}