Пример #1
0
// initializeMapFunctions initialize the mapping functions for the mapper. This only applies
// to aggregate queries.
func (lm *SelectMapper) initializeMapFunctions() error {
	var err error
	// Set up each mapping function for this statement.
	aggregates := lm.selectStmt.FunctionCalls()
	lm.mapFuncs = make([]influxql.MapFunc, len(aggregates))
	lm.fieldNames = make([]string, len(lm.mapFuncs))
	for i, c := range aggregates {
		lm.mapFuncs[i], err = influxql.InitializeMapFunc(c)
		if err != nil {
			return err
		}

		// Check for calls like `derivative(lmean(value), 1d)`
		var nested *influxql.Call = c
		if fn, ok := c.Args[0].(*influxql.Call); ok {
			nested = fn
		}
		switch lit := nested.Args[0].(type) {
		case *influxql.VarRef:
			lm.fieldNames[i] = lit.Val
		case *influxql.Distinct:
			if c.Name != "count" {
				return fmt.Errorf("aggregate call didn't contain a field %s", c.String())
			}
			lm.fieldNames[i] = lit.Val
		default:
			return fmt.Errorf("aggregate call didn't contain a field %s", c.String())
		}
	}

	return nil
}
Пример #2
0
// Begin will set up the mapper to run the map function for a given aggregate call starting at the passed in time
func (l *LocalMapper) Begin(c *influxql.Call, startingTime int64) error {
	// set up the buffers. These ensure that we return data in time order
	mapFunc, err := influxql.InitializeMapFunc(c)
	if err != nil {
		return err
	}
	l.mapFunc = mapFunc
	l.keyBuffer = make([]int64, len(l.cursors))
	l.valueBuffer = make([][]byte, len(l.cursors))
	l.tmin = startingTime

	// determine if this is a raw data query with a single field, multiple fields, or an aggregate
	var fieldName string
	if c == nil { // its a raw data query
		l.isRaw = true
		if len(l.selectFields) == 1 {
			fieldName = l.selectFields[0].Name
		}
	} else {
		lit, ok := c.Args[0].(*influxql.VarRef)
		if !ok {
			return fmt.Errorf("aggregate call didn't contain a field %s", c.String())
		}
		fieldName = lit.Val
	}

	// set up the field info if a specific field was set for this mapper
	if fieldName != "" {
		f := l.decoder.FieldByName(fieldName)
		if f == nil {
			return fmt.Errorf("%s isn't a field on measurement %s", fieldName, l.job.MeasurementName)
		}
		l.fieldID = f.ID
		l.fieldName = f.Name
	}

	// seek the bolt cursors and fill the buffers
	for i, c := range l.cursors {
		// this series may have never been written in this shard group (time range) so the cursor would be nil
		if c == nil {
			l.keyBuffer[i] = 0
			l.valueBuffer[i] = nil
			continue
		}
		k, v := c.Seek(u64tob(uint64(l.job.TMin)))
		if k == nil {
			l.keyBuffer[i] = 0
			l.valueBuffer[i] = nil
			continue
		}
		l.cursorsEmpty = false
		t := int64(btou64(k))
		l.keyBuffer[i] = t
		l.valueBuffer[i] = v
	}
	return nil
}
Пример #3
0
// Begin will set up the mapper to run the map function for a given aggregate call starting at the passed in time
func (l *LocalMapper) Begin(c *influxql.Call, startingTime int64, chunkSize int) error {
	// set up the buffers. These ensure that we return data in time order
	mapFunc, err := influxql.InitializeMapFunc(c)
	if err != nil {
		return err
	}
	l.mapFunc = mapFunc
	l.keyBuffer = make([]int64, len(l.cursors))
	l.valueBuffer = make([][]byte, len(l.cursors))
	l.chunkSize = chunkSize
	l.tmin = startingTime

	var isCountDistinct bool

	// determine if this is a raw data query with a single field, multiple fields, or an aggregate
	var fieldName string
	if c == nil { // its a raw data query
		l.isRaw = true
		if len(l.selectFields) == 1 {
			fieldName = l.selectFields[0]
		}

		// if they haven't set a limit, just set it to the max int size
		if l.limit == 0 {
			l.limit = math.MaxUint64
		}
	} else {
		// Check for calls like `derivative(mean(value), 1d)`
		var nested *influxql.Call = c
		if fn, ok := c.Args[0].(*influxql.Call); ok {
			nested = fn
		}

		switch lit := nested.Args[0].(type) {
		case *influxql.VarRef:
			fieldName = lit.Val
		case *influxql.Distinct:
			if c.Name != "count" {
				return fmt.Errorf("aggregate call didn't contain a field %s", c.String())
			}
			isCountDistinct = true
			fieldName = lit.Val
		default:
			return fmt.Errorf("aggregate call didn't contain a field %s", c.String())
		}

		isCountDistinct = isCountDistinct || (c.Name == "count" && nested.Name == "distinct")
	}

	// set up the field info if a specific field was set for this mapper
	if fieldName != "" {
		fid, err := l.decoder.FieldIDByName(fieldName)
		if err != nil {
			switch {
			case c != nil && c.Name == "distinct":
				return fmt.Errorf(`%s isn't a field on measurement %s; to query the unique values for a tag use SHOW TAG VALUES FROM %[2]s WITH KEY = "%[1]s`, fieldName, l.job.MeasurementName)
			case isCountDistinct:
				return fmt.Errorf("%s isn't a field on measurement %s; count(distinct) on tags isn't yet supported", fieldName, l.job.MeasurementName)
			}
		}
		l.fieldID = fid
		l.fieldName = fieldName
	}

	// seek the bolt cursors and fill the buffers
	for i, c := range l.cursors {
		// this series may have never been written in this shard group (time range) so the cursor would be nil
		if c == nil {
			l.keyBuffer[i] = 0
			l.valueBuffer[i] = nil
			continue
		}
		k, v := c.Seek(u64tob(uint64(l.job.TMin)))
		if k == nil {
			l.keyBuffer[i] = 0
			l.valueBuffer[i] = nil
			continue
		}
		l.cursorsEmpty = false
		t := int64(btou64(k))
		l.keyBuffer[i] = t
		l.valueBuffer[i] = v
	}
	return nil
}
Пример #4
0
// Open opens the aggregate mapper.
func (am *AggMapper) Open() error {
	var err error

	// Get a read-only transaction.
	tx, err := am.shard.DB().Begin(false)
	if err != nil {
		return err
	}
	am.tx = tx

	// Set up each mapping function for this statement.
	aggregates := am.stmt.FunctionCalls()
	am.mapFuncs = make([]influxql.MapFunc, len(aggregates))
	am.fieldNames = make([]string, len(am.mapFuncs))
	for i, c := range aggregates {
		am.mapFuncs[i], err = influxql.InitializeMapFunc(c)
		if err != nil {
			return err
		}

		// Check for calls like `derivative(mean(value), 1d)`
		var nested *influxql.Call = c
		if fn, ok := c.Args[0].(*influxql.Call); ok {
			nested = fn
		}
		switch lit := nested.Args[0].(type) {
		case *influxql.VarRef:
			am.fieldNames[i] = lit.Val
		case *influxql.Distinct:
			if c.Name != "count" {
				return fmt.Errorf("aggregate call didn't contain a field %s", c.String())
			}
			am.fieldNames[i] = lit.Val
		default:
			return fmt.Errorf("aggregate call didn't contain a field %s", c.String())
		}
	}

	// Set all time-related parameters on the mapper.
	am.queryTMin, am.queryTMax = influxql.TimeRangeAsEpochNano(am.stmt.Condition)

	// For GROUP BY time queries, limit the number of data points returned by the limit and offset
	d, err := am.stmt.GroupByInterval()
	if err != nil {
		return err
	}
	am.intervalSize = d.Nanoseconds()
	if am.queryTMin == 0 || am.intervalSize == 0 {
		am.numIntervals = 1
		am.intervalSize = am.queryTMax - am.queryTMin
	} else {
		intervalTop := am.queryTMax/am.intervalSize*am.intervalSize + am.intervalSize
		intervalBottom := am.queryTMin / am.intervalSize * am.intervalSize
		am.numIntervals = int((intervalTop - intervalBottom) / am.intervalSize)
	}

	if am.stmt.Limit > 0 || am.stmt.Offset > 0 {
		// ensure that the offset isn't higher than the number of points we'd get
		if am.stmt.Offset > am.numIntervals {
			return nil
		}

		// Take the lesser of either the pre computed number of GROUP BY buckets that
		// will be in the result or the limit passed in by the user
		if am.stmt.Limit < am.numIntervals {
			am.numIntervals = am.stmt.Limit
		}
	}

	// If we are exceeding our MaxGroupByPoints error out
	if am.numIntervals > MaxGroupByPoints {
		return errors.New("too many points in the group by interval. maybe you forgot to specify a where time clause?")
	}

	// Ensure that the start time for the results is on the start of the window.
	am.queryTMinWindow = am.queryTMin
	if am.intervalSize > 0 && am.numIntervals > 1 {
		am.queryTMinWindow = am.queryTMinWindow / am.intervalSize * am.intervalSize
	}

	// Create the TagSet cursors for the Mapper.
	for _, src := range am.stmt.Sources {
		mm, ok := src.(*influxql.Measurement)
		if !ok {
			return fmt.Errorf("invalid source type: %#v", src)
		}

		m := am.shard.index.Measurement(mm.Name)
		if m == nil {
			// This shard have never received data for the measurement. No Mapper
			// required.
			return nil
		}

		// Create tagset cursors and determine various field types within SELECT statement.
		tsf, err := createTagSetsAndFields(m, am.stmt)
		if err != nil {
			return err
		}
		tagSets := tsf.tagSets
		am.selectFields = tsf.selectFields
		am.selectTags = tsf.selectTags
		am.whereFields = tsf.whereFields

		// Validate that group by is not a field
		if err := m.ValidateGroupBy(am.stmt); err != nil {
			return err
		}

		// SLIMIT and SOFFSET the unique series
		if am.stmt.SLimit > 0 || am.stmt.SOffset > 0 {
			if am.stmt.SOffset > len(tagSets) {
				tagSets = nil
			} else {
				if am.stmt.SOffset+am.stmt.SLimit > len(tagSets) {
					am.stmt.SLimit = len(tagSets) - am.stmt.SOffset
				}

				tagSets = tagSets[am.stmt.SOffset : am.stmt.SOffset+am.stmt.SLimit]
			}
		}

		// Create all cursors for reading the data from this shard.
		for _, t := range tagSets {
			cursors := []*seriesCursor{}

			for i, key := range t.SeriesKeys {
				c := createCursorForSeries(am.tx, am.shard, key)
				if c == nil {
					// No data exists for this key.
					continue
				}
				cm := newSeriesCursor(c, t.Filters[i])
				cursors = append(cursors, cm)
			}
			tsc := newTagSetCursor(m.Name, t.Tags, cursors, am.shard.FieldCodec(m.Name))
			am.cursors = append(am.cursors, tsc)
		}
		sort.Sort(tagSetCursors(am.cursors))
	}

	return nil
}