Exemplo n.º 1
0
// NewHLLPPSketch ...
func NewHLLPPSketch(info *datamodel.Info) (*HLLPPSketch, error) {
	d := HLLPPSketch{info, hllpp.New()}
	return &d, nil
}
Exemplo n.º 2
0
func cmdReport(opts *reportOpts) {
	start := time.Now()

	files, err := filepath.Glob(filepath.Join(opts.dir, fmt.Sprintf("*.%s", tsm1.TSMFileExtension)))
	if err != nil {
		fmt.Printf("%v\n", err)
		os.Exit(1)
	}

	var filtered []string
	if opts.pattern != "" {
		for _, f := range files {
			if strings.Contains(f, opts.pattern) {
				filtered = append(filtered, f)
			}
		}
		files = filtered
	}

	if len(files) == 0 {
		fmt.Printf("no tsm files at %v\n", opts.dir)
		os.Exit(1)
	}

	tw := tabwriter.NewWriter(os.Stdout, 8, 8, 1, '\t', 0)
	fmt.Fprintln(tw, strings.Join([]string{"File", "Series", "Load Time"}, "\t"))

	totalSeries := hllpp.New()
	tagCardialities := map[string]*hllpp.HLLPP{}
	measCardinalities := map[string]*hllpp.HLLPP{}
	fieldCardinalities := map[string]*hllpp.HLLPP{}

	ordering := make([]chan struct{}, 0, len(files))
	for range files {
		ordering = append(ordering, make(chan struct{}))
	}

	for _, f := range files {
		file, err := os.OpenFile(f, os.O_RDONLY, 0600)
		if err != nil {
			fmt.Fprintf(os.Stderr, "error: %s: %v. Skipping.\n", f, err)
			continue
		}

		loadStart := time.Now()
		reader, err := tsm1.NewTSMReader(file)
		if err != nil {
			fmt.Fprintf(os.Stderr, "error: %s: %v. Skipping.\n", file.Name(), err)
			continue
		}
		loadTime := time.Since(loadStart)

		seriesCount := reader.KeyCount()
		for i := 0; i < seriesCount; i++ {
			key, _ := reader.KeyAt(i)
			totalSeries.Add([]byte(key))

			if opts.detailed {
				sep := strings.Index(key, "#!~#")
				seriesKey, field := key[:sep], key[sep+4:]
				measurement, tags, _ := models.ParseKey(seriesKey)

				measCount, ok := measCardinalities[measurement]
				if !ok {
					measCount = hllpp.New()
					measCardinalities[measurement] = measCount
				}
				measCount.Add([]byte(key))

				fieldCount, ok := fieldCardinalities[measurement]
				if !ok {
					fieldCount = hllpp.New()
					fieldCardinalities[measurement] = fieldCount
				}
				fieldCount.Add([]byte(field))

				for t, v := range tags {
					tagCount, ok := tagCardialities[t]
					if !ok {
						tagCount = hllpp.New()
						tagCardialities[t] = tagCount
					}
					tagCount.Add([]byte(v))
				}
			}
		}
		reader.Close()

		fmt.Fprintln(tw, strings.Join([]string{
			filepath.Base(file.Name()),
			strconv.FormatInt(int64(seriesCount), 10),
			loadTime.String(),
		}, "\t"))
		tw.Flush()
	}

	tw.Flush()
	println()
	fmt.Printf("Statistics\n")
	fmt.Printf("  Series:\n")
	fmt.Printf("    Total (est): %d\n", totalSeries.Count())
	if opts.detailed {
		fmt.Printf("  Measurements (est):\n")
		for t, card := range measCardinalities {
			fmt.Printf("    %v: %d (%d%%)\n", t, card.Count(), int((float64(card.Count())/float64(totalSeries.Count()))*100))
		}

		fmt.Printf("  Fields (est):\n")
		for t, card := range fieldCardinalities {
			fmt.Printf("    %v: %d\n", t, card.Count())
		}

		fmt.Printf("  Tags (est):\n")
		for t, card := range tagCardialities {
			fmt.Printf("    %v: %d\n", t, card.Count())
		}
	}

	fmt.Printf("Completed in %s\n", time.Since(start))
}
Exemplo n.º 3
0
// Run executes the command.
func (cmd *Command) Run(args ...string) error {
	fs := flag.NewFlagSet("report", flag.ExitOnError)
	fs.StringVar(&cmd.pattern, "pattern", "", "Include only files matching a pattern")
	fs.BoolVar(&cmd.detailed, "detailed", false, "Report detailed cardinality estimates")

	fs.SetOutput(cmd.Stdout)
	fs.Usage = cmd.printUsage

	if err := fs.Parse(args); err != nil {
		return err
	}
	cmd.dir = fs.Arg(0)

	start := time.Now()

	files, err := filepath.Glob(filepath.Join(cmd.dir, fmt.Sprintf("*.%s", tsm1.TSMFileExtension)))
	if err != nil {
		return err
	}

	var filtered []string
	if cmd.pattern != "" {
		for _, f := range files {
			if strings.Contains(f, cmd.pattern) {
				filtered = append(filtered, f)
			}
		}
		files = filtered
	}

	if len(files) == 0 {
		return fmt.Errorf("no tsm files at %v\n", cmd.dir)
	}

	tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0)
	fmt.Fprintln(tw, strings.Join([]string{"File", "Series", "Load Time"}, "\t"))

	totalSeries := hllpp.New()
	tagCardialities := map[string]*hllpp.HLLPP{}
	measCardinalities := map[string]*hllpp.HLLPP{}
	fieldCardinalities := map[string]*hllpp.HLLPP{}

	ordering := make([]chan struct{}, 0, len(files))
	for range files {
		ordering = append(ordering, make(chan struct{}))
	}

	for _, f := range files {
		file, err := os.OpenFile(f, os.O_RDONLY, 0600)
		if err != nil {
			fmt.Fprintf(cmd.Stderr, "error: %s: %v. Skipping.\n", f, err)
			continue
		}

		loadStart := time.Now()
		reader, err := tsm1.NewTSMReader(file)
		if err != nil {
			fmt.Fprintf(cmd.Stderr, "error: %s: %v. Skipping.\n", file.Name(), err)
			continue
		}
		loadTime := time.Since(loadStart)

		seriesCount := reader.KeyCount()
		for i := 0; i < seriesCount; i++ {
			key, _ := reader.KeyAt(i)
			totalSeries.Add([]byte(key))

			if cmd.detailed {
				sep := strings.Index(string(key), "#!~#")
				seriesKey, field := key[:sep], key[sep+4:]
				measurement, tags, _ := models.ParseKey(seriesKey)

				measCount, ok := measCardinalities[measurement]
				if !ok {
					measCount = hllpp.New()
					measCardinalities[measurement] = measCount
				}
				measCount.Add([]byte(key))

				fieldCount, ok := fieldCardinalities[measurement]
				if !ok {
					fieldCount = hllpp.New()
					fieldCardinalities[measurement] = fieldCount
				}
				fieldCount.Add([]byte(field))

				for _, t := range tags {
					tagCount, ok := tagCardialities[string(t.Key)]
					if !ok {
						tagCount = hllpp.New()
						tagCardialities[string(t.Key)] = tagCount
					}
					tagCount.Add(t.Value)
				}
			}
		}
		reader.Close()

		fmt.Fprintln(tw, strings.Join([]string{
			filepath.Base(file.Name()),
			strconv.FormatInt(int64(seriesCount), 10),
			loadTime.String(),
		}, "\t"))
		tw.Flush()
	}

	tw.Flush()
	println()
	fmt.Printf("Statistics\n")
	fmt.Printf("  Series:\n")
	fmt.Printf("    Total (est): %d\n", totalSeries.Count())
	if cmd.detailed {
		fmt.Printf("  Measurements (est):\n")
		for t, card := range measCardinalities {
			fmt.Printf("    %v: %d (%d%%)\n", t, card.Count(), int((float64(card.Count())/float64(totalSeries.Count()))*100))
		}

		fmt.Printf("  Fields (est):\n")
		for t, card := range fieldCardinalities {
			fmt.Printf("    %v: %d\n", t, card.Count())
		}

		fmt.Printf("  Tags (est):\n")
		for t, card := range tagCardialities {
			fmt.Printf("    %v: %d\n", t, card.Count())
		}
	}

	fmt.Printf("Completed in %s\n", time.Since(start))
	return nil
}