Example #1
0
// addToIndexFromKey will pull the measurement name, series key, and field name from a composite key and add it to the
// database index and measurement fields
func (e *Engine) addToIndexFromKey(key string, fieldType influxql.DataType, index *tsdb.DatabaseIndex, measurementFields map[string]*tsdb.MeasurementFields) error {
	seriesKey, field := seriesAndFieldFromCompositeKey(key)
	measurement := tsdb.MeasurementFromSeriesKey(seriesKey)

	m := index.CreateMeasurementIndexIfNotExists(measurement)
	m.SetFieldName(field)

	mf := measurementFields[measurement]
	if mf == nil {
		mf = &tsdb.MeasurementFields{
			Fields: map[string]*tsdb.Field{},
		}
		measurementFields[measurement] = mf
	}

	if err := mf.CreateFieldIfNotExists(field, fieldType, false); err != nil {
		return err
	}

	_, tags, err := models.ParseKey(seriesKey)
	if err == nil {
		return err
	}

	s := tsdb.NewSeries(seriesKey, tags)
	s.InitializeShards()
	index.CreateSeriesIndexIfNotExists(measurement, s)

	return nil
}
Example #2
0
// addToIndexFromKey will pull the measurement name, series key, and field name from a composite key and add it to the
// database index and measurement fields
func (e *Engine) addToIndexFromKey(shardID uint64, key []byte, fieldType influxql.DataType, index *tsdb.DatabaseIndex) error {
	seriesKey, field := SeriesAndFieldFromCompositeKey(key)
	measurement := tsdb.MeasurementFromSeriesKey(string(seriesKey))

	m := index.CreateMeasurementIndexIfNotExists(measurement)
	m.SetFieldName(field)

	mf := e.measurementFields[measurement]
	if mf == nil {
		mf = tsdb.NewMeasurementFields()
		e.measurementFields[measurement] = mf
	}

	if err := mf.CreateFieldIfNotExists(field, fieldType, false); err != nil {
		return err
	}

	// Have we already indexed this series?
	ss := index.SeriesBytes(seriesKey)
	if ss != nil {
		// Add this shard to the existing series
		ss.AssignShard(shardID)
		return nil
	}

	// ignore error because ParseKey returns "missing fields" and we don't have
	// fields (in line protocol format) in the series key
	_, tags, _ := models.ParseKey(seriesKey)

	s := tsdb.NewSeries(string(seriesKey), tags)
	index.CreateSeriesIndexIfNotExists(measurement, s)
	s.AssignShard(shardID)

	return nil
}
Example #3
0
// MeasurementFromSeriesKey returns the name of the measurement from a key that
// contains a measurement name.
func MeasurementFromSeriesKey(key string) string {
	// Ignoring the error because the func returns "missing fields"
	k, _, _ := models.ParseKey(key)
	return escape.UnescapeString(k)
}
Example #4
0
func cmdReport(opts *reportOpts) {
	start := time.Now()

	files, err := filepath.Glob(filepath.Join(opts.dir, fmt.Sprintf("*.%s", tsm1.TSMFileExtension)))
	if err != nil {
		fmt.Printf("%v\n", err)
		os.Exit(1)
	}

	var filtered []string
	if opts.pattern != "" {
		for _, f := range files {
			if strings.Contains(f, opts.pattern) {
				filtered = append(filtered, f)
			}
		}
		files = filtered
	}

	if len(files) == 0 {
		fmt.Printf("no tsm files at %v\n", opts.dir)
		os.Exit(1)
	}

	tw := tabwriter.NewWriter(os.Stdout, 8, 8, 1, '\t', 0)
	fmt.Fprintln(tw, strings.Join([]string{"File", "Series", "Load Time"}, "\t"))

	totalSeries := hllpp.New()
	tagCardialities := map[string]*hllpp.HLLPP{}
	measCardinalities := map[string]*hllpp.HLLPP{}
	fieldCardinalities := map[string]*hllpp.HLLPP{}

	ordering := make([]chan struct{}, 0, len(files))
	for range files {
		ordering = append(ordering, make(chan struct{}))
	}

	for _, f := range files {
		file, err := os.OpenFile(f, os.O_RDONLY, 0600)
		if err != nil {
			fmt.Fprintf(os.Stderr, "error: %s: %v. Skipping.\n", f, err)
			continue
		}

		loadStart := time.Now()
		reader, err := tsm1.NewTSMReader(file)
		if err != nil {
			fmt.Fprintf(os.Stderr, "error: %s: %v. Skipping.\n", file.Name(), err)
			continue
		}
		loadTime := time.Since(loadStart)

		seriesCount := reader.KeyCount()
		for i := 0; i < seriesCount; i++ {
			key, _ := reader.KeyAt(i)
			totalSeries.Add([]byte(key))

			if opts.detailed {
				sep := strings.Index(key, "#!~#")
				seriesKey, field := key[:sep], key[sep+4:]
				measurement, tags, _ := models.ParseKey(seriesKey)

				measCount, ok := measCardinalities[measurement]
				if !ok {
					measCount = hllpp.New()
					measCardinalities[measurement] = measCount
				}
				measCount.Add([]byte(key))

				fieldCount, ok := fieldCardinalities[measurement]
				if !ok {
					fieldCount = hllpp.New()
					fieldCardinalities[measurement] = fieldCount
				}
				fieldCount.Add([]byte(field))

				for t, v := range tags {
					tagCount, ok := tagCardialities[t]
					if !ok {
						tagCount = hllpp.New()
						tagCardialities[t] = tagCount
					}
					tagCount.Add([]byte(v))
				}
			}
		}
		reader.Close()

		fmt.Fprintln(tw, strings.Join([]string{
			filepath.Base(file.Name()),
			strconv.FormatInt(int64(seriesCount), 10),
			loadTime.String(),
		}, "\t"))
		tw.Flush()
	}

	tw.Flush()
	println()
	fmt.Printf("Statistics\n")
	fmt.Printf("  Series:\n")
	fmt.Printf("    Total (est): %d\n", totalSeries.Count())
	if opts.detailed {
		fmt.Printf("  Measurements (est):\n")
		for t, card := range measCardinalities {
			fmt.Printf("    %v: %d (%d%%)\n", t, card.Count(), int((float64(card.Count())/float64(totalSeries.Count()))*100))
		}

		fmt.Printf("  Fields (est):\n")
		for t, card := range fieldCardinalities {
			fmt.Printf("    %v: %d\n", t, card.Count())
		}

		fmt.Printf("  Tags (est):\n")
		for t, card := range tagCardialities {
			fmt.Printf("    %v: %d\n", t, card.Count())
		}
	}

	fmt.Printf("Completed in %s\n", time.Since(start))
}
Example #5
0
// Run executes the command.
func (cmd *Command) Run(args ...string) error {
	fs := flag.NewFlagSet("report", flag.ExitOnError)
	fs.StringVar(&cmd.pattern, "pattern", "", "Include only files matching a pattern")
	fs.BoolVar(&cmd.detailed, "detailed", false, "Report detailed cardinality estimates")

	fs.SetOutput(cmd.Stdout)
	fs.Usage = cmd.printUsage

	if err := fs.Parse(args); err != nil {
		return err
	}
	cmd.dir = fs.Arg(0)

	start := time.Now()

	files, err := filepath.Glob(filepath.Join(cmd.dir, fmt.Sprintf("*.%s", tsm1.TSMFileExtension)))
	if err != nil {
		return err
	}

	var filtered []string
	if cmd.pattern != "" {
		for _, f := range files {
			if strings.Contains(f, cmd.pattern) {
				filtered = append(filtered, f)
			}
		}
		files = filtered
	}

	if len(files) == 0 {
		return fmt.Errorf("no tsm files at %v\n", cmd.dir)
	}

	tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0)
	fmt.Fprintln(tw, strings.Join([]string{"File", "Series", "Load Time"}, "\t"))

	totalSeries := hllpp.New()
	tagCardialities := map[string]*hllpp.HLLPP{}
	measCardinalities := map[string]*hllpp.HLLPP{}
	fieldCardinalities := map[string]*hllpp.HLLPP{}

	ordering := make([]chan struct{}, 0, len(files))
	for range files {
		ordering = append(ordering, make(chan struct{}))
	}

	for _, f := range files {
		file, err := os.OpenFile(f, os.O_RDONLY, 0600)
		if err != nil {
			fmt.Fprintf(cmd.Stderr, "error: %s: %v. Skipping.\n", f, err)
			continue
		}

		loadStart := time.Now()
		reader, err := tsm1.NewTSMReader(file)
		if err != nil {
			fmt.Fprintf(cmd.Stderr, "error: %s: %v. Skipping.\n", file.Name(), err)
			continue
		}
		loadTime := time.Since(loadStart)

		seriesCount := reader.KeyCount()
		for i := 0; i < seriesCount; i++ {
			key, _ := reader.KeyAt(i)
			totalSeries.Add([]byte(key))

			if cmd.detailed {
				sep := strings.Index(string(key), "#!~#")
				seriesKey, field := key[:sep], key[sep+4:]
				measurement, tags, _ := models.ParseKey(seriesKey)

				measCount, ok := measCardinalities[measurement]
				if !ok {
					measCount = hllpp.New()
					measCardinalities[measurement] = measCount
				}
				measCount.Add([]byte(key))

				fieldCount, ok := fieldCardinalities[measurement]
				if !ok {
					fieldCount = hllpp.New()
					fieldCardinalities[measurement] = fieldCount
				}
				fieldCount.Add([]byte(field))

				for _, t := range tags {
					tagCount, ok := tagCardialities[string(t.Key)]
					if !ok {
						tagCount = hllpp.New()
						tagCardialities[string(t.Key)] = tagCount
					}
					tagCount.Add(t.Value)
				}
			}
		}
		reader.Close()

		fmt.Fprintln(tw, strings.Join([]string{
			filepath.Base(file.Name()),
			strconv.FormatInt(int64(seriesCount), 10),
			loadTime.String(),
		}, "\t"))
		tw.Flush()
	}

	tw.Flush()
	println()
	fmt.Printf("Statistics\n")
	fmt.Printf("  Series:\n")
	fmt.Printf("    Total (est): %d\n", totalSeries.Count())
	if cmd.detailed {
		fmt.Printf("  Measurements (est):\n")
		for t, card := range measCardinalities {
			fmt.Printf("    %v: %d (%d%%)\n", t, card.Count(), int((float64(card.Count())/float64(totalSeries.Count()))*100))
		}

		fmt.Printf("  Fields (est):\n")
		for t, card := range fieldCardinalities {
			fmt.Printf("    %v: %d\n", t, card.Count())
		}

		fmt.Printf("  Tags (est):\n")
		for t, card := range tagCardialities {
			fmt.Printf("    %v: %d\n", t, card.Count())
		}
	}

	fmt.Printf("Completed in %s\n", time.Since(start))
	return nil
}