// addToIndexFromKey will pull the measurement name, series key, and field name from a composite key and add it to the // database index and measurement fields func (e *Engine) addToIndexFromKey(key string, fieldType influxql.DataType, index *tsdb.DatabaseIndex, measurementFields map[string]*tsdb.MeasurementFields) error { seriesKey, field := seriesAndFieldFromCompositeKey(key) measurement := tsdb.MeasurementFromSeriesKey(seriesKey) m := index.CreateMeasurementIndexIfNotExists(measurement) m.SetFieldName(field) mf := measurementFields[measurement] if mf == nil { mf = &tsdb.MeasurementFields{ Fields: map[string]*tsdb.Field{}, } measurementFields[measurement] = mf } if err := mf.CreateFieldIfNotExists(field, fieldType, false); err != nil { return err } _, tags, err := models.ParseKey(seriesKey) if err == nil { return err } s := tsdb.NewSeries(seriesKey, tags) s.InitializeShards() index.CreateSeriesIndexIfNotExists(measurement, s) return nil }
// addToIndexFromKey will pull the measurement name, series key, and field name from a composite key and add it to the // database index and measurement fields func (e *Engine) addToIndexFromKey(shardID uint64, key []byte, fieldType influxql.DataType, index *tsdb.DatabaseIndex) error { seriesKey, field := SeriesAndFieldFromCompositeKey(key) measurement := tsdb.MeasurementFromSeriesKey(string(seriesKey)) m := index.CreateMeasurementIndexIfNotExists(measurement) m.SetFieldName(field) mf := e.measurementFields[measurement] if mf == nil { mf = tsdb.NewMeasurementFields() e.measurementFields[measurement] = mf } if err := mf.CreateFieldIfNotExists(field, fieldType, false); err != nil { return err } // Have we already indexed this series? ss := index.SeriesBytes(seriesKey) if ss != nil { // Add this shard to the existing series ss.AssignShard(shardID) return nil } // ignore error because ParseKey returns "missing fields" and we don't have // fields (in line protocol format) in the series key _, tags, _ := models.ParseKey(seriesKey) s := tsdb.NewSeries(string(seriesKey), tags) index.CreateSeriesIndexIfNotExists(measurement, s) s.AssignShard(shardID) return nil }
// MeasurementFromSeriesKey returns the name of the measurement from a key that // contains a measurement name. func MeasurementFromSeriesKey(key string) string { // Ignoring the error because the func returns "missing fields" k, _, _ := models.ParseKey(key) return escape.UnescapeString(k) }
func cmdReport(opts *reportOpts) { start := time.Now() files, err := filepath.Glob(filepath.Join(opts.dir, fmt.Sprintf("*.%s", tsm1.TSMFileExtension))) if err != nil { fmt.Printf("%v\n", err) os.Exit(1) } var filtered []string if opts.pattern != "" { for _, f := range files { if strings.Contains(f, opts.pattern) { filtered = append(filtered, f) } } files = filtered } if len(files) == 0 { fmt.Printf("no tsm files at %v\n", opts.dir) os.Exit(1) } tw := tabwriter.NewWriter(os.Stdout, 8, 8, 1, '\t', 0) fmt.Fprintln(tw, strings.Join([]string{"File", "Series", "Load Time"}, "\t")) totalSeries := hllpp.New() tagCardialities := map[string]*hllpp.HLLPP{} measCardinalities := map[string]*hllpp.HLLPP{} fieldCardinalities := map[string]*hllpp.HLLPP{} ordering := make([]chan struct{}, 0, len(files)) for range files { ordering = append(ordering, make(chan struct{})) } for _, f := range files { file, err := os.OpenFile(f, os.O_RDONLY, 0600) if err != nil { fmt.Fprintf(os.Stderr, "error: %s: %v. Skipping.\n", f, err) continue } loadStart := time.Now() reader, err := tsm1.NewTSMReader(file) if err != nil { fmt.Fprintf(os.Stderr, "error: %s: %v. Skipping.\n", file.Name(), err) continue } loadTime := time.Since(loadStart) seriesCount := reader.KeyCount() for i := 0; i < seriesCount; i++ { key, _ := reader.KeyAt(i) totalSeries.Add([]byte(key)) if opts.detailed { sep := strings.Index(key, "#!~#") seriesKey, field := key[:sep], key[sep+4:] measurement, tags, _ := models.ParseKey(seriesKey) measCount, ok := measCardinalities[measurement] if !ok { measCount = hllpp.New() measCardinalities[measurement] = measCount } measCount.Add([]byte(key)) fieldCount, ok := fieldCardinalities[measurement] if !ok { fieldCount = hllpp.New() fieldCardinalities[measurement] = fieldCount } fieldCount.Add([]byte(field)) for t, v := range tags { tagCount, ok := tagCardialities[t] if !ok { tagCount = hllpp.New() tagCardialities[t] = tagCount } tagCount.Add([]byte(v)) } } } reader.Close() fmt.Fprintln(tw, strings.Join([]string{ filepath.Base(file.Name()), strconv.FormatInt(int64(seriesCount), 10), loadTime.String(), }, "\t")) tw.Flush() } tw.Flush() println() fmt.Printf("Statistics\n") fmt.Printf(" Series:\n") fmt.Printf(" Total (est): %d\n", totalSeries.Count()) if opts.detailed { fmt.Printf(" Measurements (est):\n") for t, card := range measCardinalities { fmt.Printf(" %v: %d (%d%%)\n", t, card.Count(), int((float64(card.Count())/float64(totalSeries.Count()))*100)) } fmt.Printf(" Fields (est):\n") for t, card := range fieldCardinalities { fmt.Printf(" %v: %d\n", t, card.Count()) } fmt.Printf(" Tags (est):\n") for t, card := range tagCardialities { fmt.Printf(" %v: %d\n", t, card.Count()) } } fmt.Printf("Completed in %s\n", time.Since(start)) }
// Run executes the command. func (cmd *Command) Run(args ...string) error { fs := flag.NewFlagSet("report", flag.ExitOnError) fs.StringVar(&cmd.pattern, "pattern", "", "Include only files matching a pattern") fs.BoolVar(&cmd.detailed, "detailed", false, "Report detailed cardinality estimates") fs.SetOutput(cmd.Stdout) fs.Usage = cmd.printUsage if err := fs.Parse(args); err != nil { return err } cmd.dir = fs.Arg(0) start := time.Now() files, err := filepath.Glob(filepath.Join(cmd.dir, fmt.Sprintf("*.%s", tsm1.TSMFileExtension))) if err != nil { return err } var filtered []string if cmd.pattern != "" { for _, f := range files { if strings.Contains(f, cmd.pattern) { filtered = append(filtered, f) } } files = filtered } if len(files) == 0 { return fmt.Errorf("no tsm files at %v\n", cmd.dir) } tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0) fmt.Fprintln(tw, strings.Join([]string{"File", "Series", "Load Time"}, "\t")) totalSeries := hllpp.New() tagCardialities := map[string]*hllpp.HLLPP{} measCardinalities := map[string]*hllpp.HLLPP{} fieldCardinalities := map[string]*hllpp.HLLPP{} ordering := make([]chan struct{}, 0, len(files)) for range files { ordering = append(ordering, make(chan struct{})) } for _, f := range files { file, err := os.OpenFile(f, os.O_RDONLY, 0600) if err != nil { fmt.Fprintf(cmd.Stderr, "error: %s: %v. Skipping.\n", f, err) continue } loadStart := time.Now() reader, err := tsm1.NewTSMReader(file) if err != nil { fmt.Fprintf(cmd.Stderr, "error: %s: %v. Skipping.\n", file.Name(), err) continue } loadTime := time.Since(loadStart) seriesCount := reader.KeyCount() for i := 0; i < seriesCount; i++ { key, _ := reader.KeyAt(i) totalSeries.Add([]byte(key)) if cmd.detailed { sep := strings.Index(string(key), "#!~#") seriesKey, field := key[:sep], key[sep+4:] measurement, tags, _ := models.ParseKey(seriesKey) measCount, ok := measCardinalities[measurement] if !ok { measCount = hllpp.New() measCardinalities[measurement] = measCount } measCount.Add([]byte(key)) fieldCount, ok := fieldCardinalities[measurement] if !ok { fieldCount = hllpp.New() fieldCardinalities[measurement] = fieldCount } fieldCount.Add([]byte(field)) for _, t := range tags { tagCount, ok := tagCardialities[string(t.Key)] if !ok { tagCount = hllpp.New() tagCardialities[string(t.Key)] = tagCount } tagCount.Add(t.Value) } } } reader.Close() fmt.Fprintln(tw, strings.Join([]string{ filepath.Base(file.Name()), strconv.FormatInt(int64(seriesCount), 10), loadTime.String(), }, "\t")) tw.Flush() } tw.Flush() println() fmt.Printf("Statistics\n") fmt.Printf(" Series:\n") fmt.Printf(" Total (est): %d\n", totalSeries.Count()) if cmd.detailed { fmt.Printf(" Measurements (est):\n") for t, card := range measCardinalities { fmt.Printf(" %v: %d (%d%%)\n", t, card.Count(), int((float64(card.Count())/float64(totalSeries.Count()))*100)) } fmt.Printf(" Fields (est):\n") for t, card := range fieldCardinalities { fmt.Printf(" %v: %d\n", t, card.Count()) } fmt.Printf(" Tags (est):\n") for t, card := range tagCardialities { fmt.Printf(" %v: %d\n", t, card.Count()) } } fmt.Printf("Completed in %s\n", time.Since(start)) return nil }