Example #1
0
func TestEncoding_Count(t *testing.T) {
	tests := []struct {
		value     interface{}
		blockType byte
	}{
		{value: float64(1.0), blockType: tsm1.BlockFloat64},
		{value: int64(1), blockType: tsm1.BlockInteger},
		{value: true, blockType: tsm1.BlockBoolean},
		{value: "string", blockType: tsm1.BlockString},
	}

	for _, test := range tests {
		var values []tsm1.Value
		values = append(values, tsm1.NewValue(0, test.value))

		b, err := tsm1.Values(values).Encode(nil)
		if err != nil {
			t.Fatalf("unexpected error: %v", err)
		}

		if got, exp := tsm1.BlockCount(b), 1; got != exp {
			t.Fatalf("block count mismatch: got %v, exp %v", got, exp)
		}
	}
}
Example #2
0
func cmdDumpTsm1(opts *tsdmDumpOpts) {
	var errors []error

	f, err := os.Open(opts.path)
	if err != nil {
		println(err.Error())
		os.Exit(1)
	}

	// Get the file size
	stat, err := f.Stat()
	if err != nil {
		println(err.Error())
		os.Exit(1)
	}

	b := make([]byte, 8)
	f.Read(b[:4])

	// Verify magic number
	if binary.BigEndian.Uint32(b[:4]) != 0x16D116D1 {
		println("Not a tsm1 file.")
		os.Exit(1)
	}

	ids, err := readIds(filepath.Dir(opts.path))
	if err != nil {
		println("Failed to read series:", err.Error())
		os.Exit(1)
	}

	invIds := map[uint64]string{}
	for k, v := range ids {
		invIds[v] = k
	}

	index, err := readIndex(f)
	if err != nil {
		println("Failed to readIndex:", err.Error())

		// Create a stubbed out index so we can still try and read the block data directly
		// w/o panicing ourselves.
		index = &tsmIndex{
			minTime: 0,
			maxTime: 0,
			offset:  stat.Size(),
		}
	}

	blockStats := &blockStats{}

	println("Summary:")
	fmt.Printf("  File: %s\n", opts.path)
	fmt.Printf("  Time Range: %s - %s\n",
		time.Unix(0, index.minTime).UTC().Format(time.RFC3339Nano),
		time.Unix(0, index.maxTime).UTC().Format(time.RFC3339Nano),
	)
	fmt.Printf("  Duration: %s ", time.Unix(0, index.maxTime).Sub(time.Unix(0, index.minTime)))
	fmt.Printf("  Series: %d ", index.series)
	fmt.Printf("  File Size: %d\n", stat.Size())
	println()

	tw := tabwriter.NewWriter(os.Stdout, 8, 8, 1, '\t', 0)
	fmt.Fprintln(tw, "  "+strings.Join([]string{"Pos", "ID", "Ofs", "Key", "Field"}, "\t"))
	for i, block := range index.blocks {
		key := invIds[block.id]
		split := strings.Split(key, "#!~#")

		// We dont' know know if we have fields so use an informative default
		var measurement, field string = "UNKNOWN", "UNKNOWN"

		// We read some IDs from the ids file
		if len(invIds) > 0 {
			// Change the default to error until we know we have a valid key
			measurement = "ERR"
			field = "ERR"

			// Possible corruption? Try to read as much as we can and point to the problem.
			if key == "" {
				errors = append(errors, fmt.Errorf("index pos %d, field id: %d, missing key for id", i, block.id))
			} else if len(split) < 2 {
				errors = append(errors, fmt.Errorf("index pos %d, field id: %d, key corrupt: got '%v'", i, block.id, key))
			} else {
				measurement = split[0]
				field = split[1]
			}
		}

		if opts.filterKey != "" && !strings.Contains(key, opts.filterKey) {
			continue
		}
		fmt.Fprintln(tw, "  "+strings.Join([]string{
			strconv.FormatInt(int64(i), 10),
			strconv.FormatUint(block.id, 10),
			strconv.FormatInt(int64(block.offset), 10),
			measurement,
			field,
		}, "\t"))
	}

	if opts.dumpIndex {
		println("Index:")
		tw.Flush()
		println()
	}

	tw = tabwriter.NewWriter(os.Stdout, 8, 8, 1, '\t', 0)
	fmt.Fprintln(tw, "  "+strings.Join([]string{"Blk", "Ofs", "Len", "ID", "Type", "Min Time", "Points", "Enc [T/V]", "Len [T/V]"}, "\t"))

	// Staring at 4 because the magic number is 4 bytes
	i := int64(4)
	var blockCount, pointCount, blockSize int64
	indexSize := stat.Size() - index.offset

	// Start at the beginning and read every block
	for i < index.offset {
		f.Seek(int64(i), 0)

		f.Read(b)
		id := binary.BigEndian.Uint64(b)
		f.Read(b[:4])
		length := binary.BigEndian.Uint32(b[:4])
		buf := make([]byte, length)
		f.Read(buf)

		blockSize += int64(len(buf)) + 12

		startTime := time.Unix(0, int64(binary.BigEndian.Uint64(buf[:8])))
		blockType := buf[8]

		encoded := buf[9:]

		cnt := tsm1.BlockCount(buf)
		pointCount += int64(cnt)

		// Length of the timestamp block
		tsLen, j := binary.Uvarint(encoded)

		// Unpack the timestamp bytes
		ts := encoded[int(j) : int(j)+int(tsLen)]

		// Unpack the value bytes
		values := encoded[int(j)+int(tsLen):]

		tsEncoding := timeEnc[int(ts[0]>>4)]
		vEncoding := encDescs[int(blockType+1)][values[0]>>4]

		typeDesc := blockTypes[blockType]

		blockStats.inc(0, ts[0]>>4)
		blockStats.inc(int(blockType+1), values[0]>>4)
		blockStats.size(len(buf))

		if opts.filterKey != "" && !strings.Contains(invIds[id], opts.filterKey) {
			i += (12 + int64(length))
			blockCount++
			continue
		}

		fmt.Fprintln(tw, "  "+strings.Join([]string{
			strconv.FormatInt(blockCount, 10),
			strconv.FormatInt(i, 10),
			strconv.FormatInt(int64(len(buf)), 10),
			strconv.FormatUint(id, 10),
			typeDesc,
			startTime.UTC().Format(time.RFC3339Nano),
			strconv.FormatInt(int64(cnt), 10),
			fmt.Sprintf("%s/%s", tsEncoding, vEncoding),
			fmt.Sprintf("%d/%d", len(ts), len(values)),
		}, "\t"))

		i += (12 + int64(length))
		blockCount++
	}
	if opts.dumpBlocks {
		println("Blocks:")
		tw.Flush()
		println()
	}

	fmt.Printf("Statistics\n")
	fmt.Printf("  Blocks:\n")
	fmt.Printf("    Total: %d Size: %d Min: %d Max: %d Avg: %d\n",
		blockCount, blockSize, blockStats.min, blockStats.max, blockSize/blockCount)
	fmt.Printf("  Index:\n")
	fmt.Printf("    Total: %d Size: %d\n", len(index.blocks), indexSize)
	fmt.Printf("  Points:\n")
	fmt.Printf("    Total: %d", pointCount)
	println()

	println("  Encoding:")
	for i, counts := range blockStats.counts {
		if len(counts) == 0 {
			continue
		}
		fmt.Printf("    %s: ", strings.Title(fieldType[i]))
		for j, v := range counts {
			fmt.Printf("\t%s: %d (%d%%) ", encDescs[i][j], v, int(float64(v)/float64(blockCount)*100))
		}
		println()
	}
	fmt.Printf("  Compression:\n")
	fmt.Printf("    Per block: %0.2f bytes/point\n", float64(blockSize)/float64(pointCount))
	fmt.Printf("    Total: %0.2f bytes/point\n", float64(stat.Size())/float64(pointCount))

	if len(errors) > 0 {
		println()
		fmt.Printf("Errors (%d):\n", len(errors))
		for _, err := range errors {
			fmt.Printf("  * %v\n", err)
		}
		println()
	}
}