func main() { var inputs []io.Reader if flag.NArg() > 0 { for _, file_path := range flag.Args() { file, err := os.Open(file_path) if err != nil { log.Fatal(err) } inputs = append(inputs, file) } } else { inputs = []io.Reader{os.Stdin} } start := time.Now() filter := func(c rune) bool { return !unicode.IsLetter(c) && !unicode.IsNumber(c) } switch { case *words: dict := oc.NewOc() if *stem { const chunksize = 64 << 10 stems := make(chan string, chunksize) stemswg := new(sync.WaitGroup) stemswg.Add(1) go func() { for word := range stems { dict.Increment(word, 1) } stemswg.Done() }() wg := new(sync.WaitGroup) for _, input := range inputs { br := bufio.NewReaderSize(input, 1<<20) for { buf := make([]byte, chunksize) n, err := br.Read(buf) if err != nil && err != io.EOF { log.Fatal(err) } if n == 0 { break } buf = buf[:n] // read to a newline to prevent // spanning a word across two buffers if buf[len(buf)-1] != '\n' { extra, err := br.ReadBytes('\n') if err != nil && err != io.EOF { log.Fatal(err) } buf = append(buf, extra...) } wg.Add(1) go func(b []byte) { for _, word := range strings.FieldsFunc(string(b), filter) { stems <- english.Stem(word, false) } wg.Done() }(buf) } } wg.Wait() close(stems) stemswg.Wait() } else { for _, input := range inputs { buf := bufio.NewReader(input) for { line, err := buf.ReadString('\n') if err != nil { if err == io.EOF { break } log.Fatal(err) } for _, word := range strings.FieldsFunc(line, filter) { dict.Increment(word, 1) } } } } dict.SortByCt(oc.DESC) for dict.Next() { key, ct := dict.KeyValue() fmt.Printf("%s: %d\n", key, ct) } fmt.Fprintf(os.Stderr, "%d words counted in %s\n", dict.Len(), time.Since(start)) default: dict := oc.NewOc() for _, input := range inputs { buf := bufio.NewReader(input) for { r, _, err := buf.ReadRune() if err != nil { if err == io.EOF { break } log.Fatal(err) } dict.Increment(strconv.QuoteRune(r), 1) } } dict.SortByCt(oc.DESC) for dict.Next() { key, ct := dict.KeyValue() fmt.Printf("%s: %d\n", key, ct) } fmt.Fprintf(os.Stderr, "%d runes counted in %s\n", dict.Len(), time.Since(start)) } }
func main() { fmt.Println("running...") defer fmt.Println("done!") nums := make([]int64, 1<<20) for i, _ := range nums { nums[i] = int64(i) } bnums := []byte{} bn := (*reflect.SliceHeader)(unsafe.Pointer(&bnums)) bn.Data = (uintptr)(unsafe.Pointer(&nums[0])) bn.Cap = cap(nums) * 8 bn.Len = len(nums) * 8 set := oc.NewOc() set.Increment("[]int64", len(nums)*8) set.Increment("[]byte", len(bnums)) var buf bytes.Buffer w := gzip.NewWriter(&buf) w.Write(bnums) w.Close() set.Increment("gzip []byte", buf.Len()) buf.Reset() json.NewEncoder(&buf).Encode(bnums) set.Increment("json []int64", buf.Len()) buf.Reset() w.Reset(&buf) json.NewEncoder(w).Encode(bnums) w.Close() set.Increment("gzip json []int64", buf.Len()) buf.Reset() gob.NewEncoder(&buf).Encode(bnums) set.Increment("gob []int64", buf.Len()) buf.Reset() w.Reset(&buf) gob.NewEncoder(w).Encode(bnums) w.Close() set.Increment("gzip gob []int64", buf.Len()) buf.Reset() msgpack.NewEncoder(&buf).Encode(bnums) set.Increment("msgpack []int64", buf.Len()) buf.Reset() w.Reset(&buf) msgpack.NewEncoder(w).Encode(bnums) w.Close() set.Increment("gzip msgpack []int64", buf.Len()) data, _ := bson.Marshal(bnums) set.Increment("bson []int64", len(data)) buf.Reset() w.Reset(&buf) w.Write(data) w.Close() set.Increment("gzip bson []int64", buf.Len()) set.SortByCt(oc.ASC) for set.Next() { fmt.Println(set.KeyValue()) } }
func TestHashSource(t *testing.T) { const ( set_size = 16 runs = 1 << 20 ) tabw := tabwriter.NewWriter(os.Stdout, 16, 8, 1, '\t', 0) defer tabw.Flush() fmt.Fprintln(tabw, "name\tmin\tmax\tdev\tdist\tmean\tstddev") seed := NewCryptoRandSeed() for _, name := range names { source := rand.New(NewHashSource(hashes[name], seed)) set := oc.NewOc() var numbers []float64 for i := 0; i < runs; i++ { numbers = append(numbers, float64(source.Int63())) set.Increment(strconv.Itoa(source.Intn(set_size)), 1) } if set.Len() != set_size { t.Errorf("Expected full distribution across set for %s, got %d", name, set.Len()) } set.SortByCt(oc.DESC) var min, max int64 for set.Next() { k, vi := set.KeyValue() v := int64(vi) if min == 0 || v < min { min = v } if v > max { max = v } _ = k // fmt.Println(k, "\t", v) } stats := getStats(numbers) fmt.Fprintf(tabw, "%s\t%d\t%d\t%d\t%f\t%.0f\t%.0f\n", name, min, max, max-min, 1-(float64(max-min)/runs), stats.mean, stats.stdDev) tabw.Flush() } source := rand.New(rand.NewSource(seed)) set := oc.NewOc() var numbers []float64 for i := 0; i < runs; i++ { numbers = append(numbers, float64(source.Int63())) set.Increment(strconv.Itoa(source.Intn(set_size)), 1) } set.SortByCt(oc.DESC) var min, max int64 for set.Next() { _, vi := set.KeyValue() v := int64(vi) if min == 0 || v < min { min = v } if v > max { max = v } } stats := getStats(numbers) fmt.Fprintf(tabw, "%s\t%d\t%d\t%d\t%f\t%.0f\t%.0f\n", "math/rand", min, max, max-min, 1-(float64(max-min)/runs), stats.mean, stats.stdDev) }