// initializeMapFunctions initialize the mapping functions for the mapper. This only applies // to aggregate queries. func (lm *SelectMapper) initializeMapFunctions() error { var err error // Set up each mapping function for this statement. aggregates := lm.selectStmt.FunctionCalls() lm.mapFuncs = make([]influxql.MapFunc, len(aggregates)) lm.fieldNames = make([]string, len(lm.mapFuncs)) for i, c := range aggregates { lm.mapFuncs[i], err = influxql.InitializeMapFunc(c) if err != nil { return err } // Check for calls like `derivative(lmean(value), 1d)` var nested *influxql.Call = c if fn, ok := c.Args[0].(*influxql.Call); ok { nested = fn } switch lit := nested.Args[0].(type) { case *influxql.VarRef: lm.fieldNames[i] = lit.Val case *influxql.Distinct: if c.Name != "count" { return fmt.Errorf("aggregate call didn't contain a field %s", c.String()) } lm.fieldNames[i] = lit.Val default: return fmt.Errorf("aggregate call didn't contain a field %s", c.String()) } } return nil }
// Begin will set up the mapper to run the map function for a given aggregate call starting at the passed in time func (l *LocalMapper) Begin(c *influxql.Call, startingTime int64) error { // set up the buffers. These ensure that we return data in time order mapFunc, err := influxql.InitializeMapFunc(c) if err != nil { return err } l.mapFunc = mapFunc l.keyBuffer = make([]int64, len(l.cursors)) l.valueBuffer = make([][]byte, len(l.cursors)) l.tmin = startingTime // determine if this is a raw data query with a single field, multiple fields, or an aggregate var fieldName string if c == nil { // its a raw data query l.isRaw = true if len(l.selectFields) == 1 { fieldName = l.selectFields[0].Name } } else { lit, ok := c.Args[0].(*influxql.VarRef) if !ok { return fmt.Errorf("aggregate call didn't contain a field %s", c.String()) } fieldName = lit.Val } // set up the field info if a specific field was set for this mapper if fieldName != "" { f := l.decoder.FieldByName(fieldName) if f == nil { return fmt.Errorf("%s isn't a field on measurement %s", fieldName, l.job.MeasurementName) } l.fieldID = f.ID l.fieldName = f.Name } // seek the bolt cursors and fill the buffers for i, c := range l.cursors { // this series may have never been written in this shard group (time range) so the cursor would be nil if c == nil { l.keyBuffer[i] = 0 l.valueBuffer[i] = nil continue } k, v := c.Seek(u64tob(uint64(l.job.TMin))) if k == nil { l.keyBuffer[i] = 0 l.valueBuffer[i] = nil continue } l.cursorsEmpty = false t := int64(btou64(k)) l.keyBuffer[i] = t l.valueBuffer[i] = v } return nil }
// Begin will set up the mapper to run the map function for a given aggregate call starting at the passed in time func (l *LocalMapper) Begin(c *influxql.Call, startingTime int64, chunkSize int) error { // set up the buffers. These ensure that we return data in time order mapFunc, err := influxql.InitializeMapFunc(c) if err != nil { return err } l.mapFunc = mapFunc l.keyBuffer = make([]int64, len(l.cursors)) l.valueBuffer = make([][]byte, len(l.cursors)) l.chunkSize = chunkSize l.tmin = startingTime var isCountDistinct bool // determine if this is a raw data query with a single field, multiple fields, or an aggregate var fieldName string if c == nil { // its a raw data query l.isRaw = true if len(l.selectFields) == 1 { fieldName = l.selectFields[0] } // if they haven't set a limit, just set it to the max int size if l.limit == 0 { l.limit = math.MaxUint64 } } else { // Check for calls like `derivative(mean(value), 1d)` var nested *influxql.Call = c if fn, ok := c.Args[0].(*influxql.Call); ok { nested = fn } switch lit := nested.Args[0].(type) { case *influxql.VarRef: fieldName = lit.Val case *influxql.Distinct: if c.Name != "count" { return fmt.Errorf("aggregate call didn't contain a field %s", c.String()) } isCountDistinct = true fieldName = lit.Val default: return fmt.Errorf("aggregate call didn't contain a field %s", c.String()) } isCountDistinct = isCountDistinct || (c.Name == "count" && nested.Name == "distinct") } // set up the field info if a specific field was set for this mapper if fieldName != "" { fid, err := l.decoder.FieldIDByName(fieldName) if err != nil { switch { case c != nil && c.Name == "distinct": return fmt.Errorf(`%s isn't a field on measurement %s; to query the unique values for a tag use SHOW TAG VALUES FROM %[2]s WITH KEY = "%[1]s`, fieldName, l.job.MeasurementName) case isCountDistinct: return fmt.Errorf("%s isn't a field on measurement %s; count(distinct) on tags isn't yet supported", fieldName, l.job.MeasurementName) } } l.fieldID = fid l.fieldName = fieldName } // seek the bolt cursors and fill the buffers for i, c := range l.cursors { // this series may have never been written in this shard group (time range) so the cursor would be nil if c == nil { l.keyBuffer[i] = 0 l.valueBuffer[i] = nil continue } k, v := c.Seek(u64tob(uint64(l.job.TMin))) if k == nil { l.keyBuffer[i] = 0 l.valueBuffer[i] = nil continue } l.cursorsEmpty = false t := int64(btou64(k)) l.keyBuffer[i] = t l.valueBuffer[i] = v } return nil }
// Open opens the aggregate mapper. func (am *AggMapper) Open() error { var err error // Get a read-only transaction. tx, err := am.shard.DB().Begin(false) if err != nil { return err } am.tx = tx // Set up each mapping function for this statement. aggregates := am.stmt.FunctionCalls() am.mapFuncs = make([]influxql.MapFunc, len(aggregates)) am.fieldNames = make([]string, len(am.mapFuncs)) for i, c := range aggregates { am.mapFuncs[i], err = influxql.InitializeMapFunc(c) if err != nil { return err } // Check for calls like `derivative(mean(value), 1d)` var nested *influxql.Call = c if fn, ok := c.Args[0].(*influxql.Call); ok { nested = fn } switch lit := nested.Args[0].(type) { case *influxql.VarRef: am.fieldNames[i] = lit.Val case *influxql.Distinct: if c.Name != "count" { return fmt.Errorf("aggregate call didn't contain a field %s", c.String()) } am.fieldNames[i] = lit.Val default: return fmt.Errorf("aggregate call didn't contain a field %s", c.String()) } } // Set all time-related parameters on the mapper. am.queryTMin, am.queryTMax = influxql.TimeRangeAsEpochNano(am.stmt.Condition) // For GROUP BY time queries, limit the number of data points returned by the limit and offset d, err := am.stmt.GroupByInterval() if err != nil { return err } am.intervalSize = d.Nanoseconds() if am.queryTMin == 0 || am.intervalSize == 0 { am.numIntervals = 1 am.intervalSize = am.queryTMax - am.queryTMin } else { intervalTop := am.queryTMax/am.intervalSize*am.intervalSize + am.intervalSize intervalBottom := am.queryTMin / am.intervalSize * am.intervalSize am.numIntervals = int((intervalTop - intervalBottom) / am.intervalSize) } if am.stmt.Limit > 0 || am.stmt.Offset > 0 { // ensure that the offset isn't higher than the number of points we'd get if am.stmt.Offset > am.numIntervals { return nil } // Take the lesser of either the pre computed number of GROUP BY buckets that // will be in the result or the limit passed in by the user if am.stmt.Limit < am.numIntervals { am.numIntervals = am.stmt.Limit } } // If we are exceeding our MaxGroupByPoints error out if am.numIntervals > MaxGroupByPoints { return errors.New("too many points in the group by interval. maybe you forgot to specify a where time clause?") } // Ensure that the start time for the results is on the start of the window. am.queryTMinWindow = am.queryTMin if am.intervalSize > 0 && am.numIntervals > 1 { am.queryTMinWindow = am.queryTMinWindow / am.intervalSize * am.intervalSize } // Create the TagSet cursors for the Mapper. for _, src := range am.stmt.Sources { mm, ok := src.(*influxql.Measurement) if !ok { return fmt.Errorf("invalid source type: %#v", src) } m := am.shard.index.Measurement(mm.Name) if m == nil { // This shard have never received data for the measurement. No Mapper // required. return nil } // Create tagset cursors and determine various field types within SELECT statement. tsf, err := createTagSetsAndFields(m, am.stmt) if err != nil { return err } tagSets := tsf.tagSets am.selectFields = tsf.selectFields am.selectTags = tsf.selectTags am.whereFields = tsf.whereFields // Validate that group by is not a field if err := m.ValidateGroupBy(am.stmt); err != nil { return err } // SLIMIT and SOFFSET the unique series if am.stmt.SLimit > 0 || am.stmt.SOffset > 0 { if am.stmt.SOffset > len(tagSets) { tagSets = nil } else { if am.stmt.SOffset+am.stmt.SLimit > len(tagSets) { am.stmt.SLimit = len(tagSets) - am.stmt.SOffset } tagSets = tagSets[am.stmt.SOffset : am.stmt.SOffset+am.stmt.SLimit] } } // Create all cursors for reading the data from this shard. for _, t := range tagSets { cursors := []*seriesCursor{} for i, key := range t.SeriesKeys { c := createCursorForSeries(am.tx, am.shard, key) if c == nil { // No data exists for this key. continue } cm := newSeriesCursor(c, t.Filters[i]) cursors = append(cursors, cm) } tsc := newTagSetCursor(m.Name, t.Tags, cursors, am.shard.FieldCodec(m.Name)) am.cursors = append(am.cursors, tsc) } sort.Sort(tagSetCursors(am.cursors)) } return nil }