// Begin will set up the mapper to run the map function for a given aggregate call starting at the passed in time func (l *LocalMapper) Begin(c *influxql.Call, startingTime int64) error { // set up the buffers. These ensure that we return data in time order mapFunc, err := influxql.InitializeMapFunc(c) if err != nil { return err } l.mapFunc = mapFunc l.keyBuffer = make([]int64, len(l.cursors)) l.valueBuffer = make([][]byte, len(l.cursors)) l.tmin = startingTime // determine if this is a raw data query with a single field, multiple fields, or an aggregate var fieldName string if c == nil { // its a raw data query l.isRaw = true if len(l.selectFields) == 1 { fieldName = l.selectFields[0].Name } } else { lit, ok := c.Args[0].(*influxql.VarRef) if !ok { return fmt.Errorf("aggregate call didn't contain a field %s", c.String()) } fieldName = lit.Val } // set up the field info if a specific field was set for this mapper if fieldName != "" { f := l.decoder.FieldByName(fieldName) if f == nil { return fmt.Errorf("%s isn't a field on measurement %s", fieldName, l.job.MeasurementName) } l.fieldID = f.ID l.fieldName = f.Name } // seek the bolt cursors and fill the buffers for i, c := range l.cursors { // this series may have never been written in this shard group (time range) so the cursor would be nil if c == nil { l.keyBuffer[i] = 0 l.valueBuffer[i] = nil continue } k, v := c.Seek(u64tob(uint64(l.job.TMin))) if k == nil { l.keyBuffer[i] = 0 l.valueBuffer[i] = nil continue } l.cursorsEmpty = false t := int64(btou64(k)) l.keyBuffer[i] = t l.valueBuffer[i] = v } return nil }
// initializemapFunc takes an aggregate call from the query and returns the mapFunc func initializeMapFunc(c *influxql.Call) (mapFunc, error) { // see if it's a query for raw data if c == nil { return MapRawQuery, nil } // Retrieve map function by name. switch c.Name { case "count": if _, ok := c.Args[0].(*influxql.Distinct); ok { return MapCountDistinct, nil } if c, ok := c.Args[0].(*influxql.Call); ok { if c.Name == "distinct" { return MapCountDistinct, nil } } return MapCount, nil case "distinct": return MapDistinct, nil case "sum": return MapSum, nil case "mean": return MapMean, nil case "median": return MapStddev, nil case "min": return func(input *MapInput) interface{} { return MapMin(input, c.Fields()[0]) }, nil case "max": return func(input *MapInput) interface{} { return MapMax(input, c.Fields()[0]) }, nil case "spread": return MapSpread, nil case "stddev": return MapStddev, nil case "first": return func(input *MapInput) interface{} { return MapFirst(input, c.Fields()[0]) }, nil case "last": return func(input *MapInput) interface{} { return MapLast(input, c.Fields()[0]) }, nil case "top", "bottom": // Capture information from the call that the Map function will require lit, _ := c.Args[len(c.Args)-1].(*influxql.NumberLiteral) limit := int(lit.Val) fields := topCallArgs(c) return func(input *MapInput) interface{} { return MapTopBottom(input, limit, fields, len(c.Args), c.Name) }, nil case "percentile": return MapEcho, nil case "derivative", "non_negative_derivative": // If the arg is another aggregate e.g. derivative(mean(value)), then // use the map func for that nested aggregate if fn, ok := c.Args[0].(*influxql.Call); ok { return initializeMapFunc(fn) } return MapRawQuery, nil default: return nil, fmt.Errorf("function not found: %q", c.Name) } }
// Begin will set up the mapper to run the map function for a given aggregate call starting at the passed in time func (l *LocalMapper) Begin(c *influxql.Call, startingTime int64, chunkSize int) error { // set up the buffers. These ensure that we return data in time order mapFunc, err := influxql.InitializeMapFunc(c) if err != nil { return err } l.mapFunc = mapFunc l.keyBuffer = make([]int64, len(l.cursors)) l.valueBuffer = make([][]byte, len(l.cursors)) l.chunkSize = chunkSize l.tmin = startingTime var isCountDistinct bool // determine if this is a raw data query with a single field, multiple fields, or an aggregate var fieldName string if c == nil { // its a raw data query l.isRaw = true if len(l.selectFields) == 1 { fieldName = l.selectFields[0] } // if they haven't set a limit, just set it to the max int size if l.limit == 0 { l.limit = math.MaxUint64 } } else { // Check for calls like `derivative(mean(value), 1d)` var nested *influxql.Call = c if fn, ok := c.Args[0].(*influxql.Call); ok { nested = fn } switch lit := nested.Args[0].(type) { case *influxql.VarRef: fieldName = lit.Val case *influxql.Distinct: if c.Name != "count" { return fmt.Errorf("aggregate call didn't contain a field %s", c.String()) } isCountDistinct = true fieldName = lit.Val default: return fmt.Errorf("aggregate call didn't contain a field %s", c.String()) } isCountDistinct = isCountDistinct || (c.Name == "count" && nested.Name == "distinct") } // set up the field info if a specific field was set for this mapper if fieldName != "" { fid, err := l.decoder.FieldIDByName(fieldName) if err != nil { switch { case c != nil && c.Name == "distinct": return fmt.Errorf(`%s isn't a field on measurement %s; to query the unique values for a tag use SHOW TAG VALUES FROM %[2]s WITH KEY = "%[1]s`, fieldName, l.job.MeasurementName) case isCountDistinct: return fmt.Errorf("%s isn't a field on measurement %s; count(distinct) on tags isn't yet supported", fieldName, l.job.MeasurementName) } } l.fieldID = fid l.fieldName = fieldName } // seek the bolt cursors and fill the buffers for i, c := range l.cursors { // this series may have never been written in this shard group (time range) so the cursor would be nil if c == nil { l.keyBuffer[i] = 0 l.valueBuffer[i] = nil continue } k, v := c.Seek(u64tob(uint64(l.job.TMin))) if k == nil { l.keyBuffer[i] = 0 l.valueBuffer[i] = nil continue } l.cursorsEmpty = false t := int64(btou64(k)) l.keyBuffer[i] = t l.valueBuffer[i] = v } return nil }
// MapTop emits the top data points for each group by interval func MapTop(itr iterator, c *influxql.Call) interface{} { // Capture the limit if it was specified in the call lit, _ := c.Args[len(c.Args)-1].(*influxql.NumberLiteral) limit := int64(lit.Val) // Simple case where only value and limit are specified. if len(c.Args) == 2 { out := positionOut{callArgs: topCallArgs(c)} for k, v := itr.Next(); k != -1; k, v = itr.Next() { t := k if bt := itr.TMin(); bt > -1 { t = bt } out.points = append(out.points, PositionPoint{t, v, itr.Tags()}) } // If we have more than we asked for, only send back the top values if int64(len(out.points)) > limit { sort.Sort(topMapOut{out}) out.points = out.points[:limit] } if len(out.points) > 0 { return out.points } return nil } // They specified tags in the call to get unique sets, so we need to map them as we accumulate them outMap := make(map[string]positionOut) mapKey := func(args []string, fields map[string]interface{}, keys map[string]string) string { key := "" for _, a := range args { if v, ok := fields[a]; ok { key += a + ":" + fmt.Sprintf("%v", v) + "," continue } if v, ok := keys[a]; ok { key += a + ":" + v + "," continue } } return key } for k, v := itr.Next(); k != -1; k, v = itr.Next() { t := k if bt := itr.TMin(); bt > -1 { t = bt } callArgs := c.Fields() tags := itr.Tags() // TODO in the future we need to send in fields as well // this will allow a user to query on both fields and tags // fields will take the priority over tags if there is a name collision key := mapKey(callArgs, nil, tags) if out, ok := outMap[key]; ok { out.points = append(out.points, PositionPoint{t, v, itr.Tags()}) outMap[key] = out } else { out = positionOut{callArgs: topCallArgs(c)} out.points = append(out.points, PositionPoint{t, v, itr.Tags()}) outMap[key] = out } } // Sort all the maps for k, v := range outMap { sort.Sort(topMapOut{v}) outMap[k] = v } slice := func(needed int64, m map[string]positionOut) PositionPoints { points := PositionPoints{} var collected int64 for k, v := range m { if len(v.points) > 0 { points = append(points, v.points[0]) v.points = v.points[1:] m[k] = v collected++ } } o := positionOut{callArgs: topCallArgs(c), points: points} sort.Sort(topMapOut{o}) points = o.points // If we got more than we needed, sort them and return the top if collected > needed { points = o.points[:needed] } return points } points := PositionPoints{} var collected int64 for collected < limit { p := slice(limit-collected, outMap) if len(p) == 0 { break } points = append(points, p...) collected += int64(len(p)) } if len(points) > 0 { return points } return nil }
// MapTop emits the top data points for each group by interval func MapTop(itr iterator, c *influxql.Call) interface{} { // Capture the limit if it was specified in the call lit, _ := c.Args[len(c.Args)-1].(*influxql.NumberLiteral) limit := int(lit.Val) out := positionOut{callArgs: topCallArgs(c)} out.points = make([]PositionPoint, 0, limit) minheap := topMapOut{&out} tagmap := make(map[string]PositionPoint) // buffer so we don't allocate every time through var pp PositionPoint if len(c.Args) > 2 { // this is a tag aggregating query. // For each unique permutation of the tags given, // select the max and then fall through to select top of those // points for k, v := itr.Next(); k != -1; k, v = itr.Next() { pp = PositionPoint{k, v, itr.Tags()} callArgs := c.Fields() tags := itr.Tags() // TODO in the future we need to send in fields as well // this will allow a user to query on both fields and tags // fields will take the priority over tags if there is a name collision key := tagkeytop(callArgs, nil, tags) p, ok := tagmap[key] if !ok || minheap.positionPointLess(&p, &pp) { tagmap[key] = pp } } itr = &mapIter{ m: tagmap, tmin: itr.TMin(), } } for k, v := itr.Next(); k != -1; k, v = itr.Next() { t := k if bt := itr.TMin(); bt > -1 { t = bt } if len(out.points) < limit { out.points = append(out.points, PositionPoint{t, v, itr.Tags()}) if len(out.points) == limit { heap.Init(&minheap) } } else { // we're over the limit, so find out if we're bigger than the // smallest point in the set and eject it if we are minval := &out.points[0] pp = PositionPoint{t, v, itr.Tags()} if minheap.positionPointLess(minval, &pp) { minheap.insert(pp) } } } // should only happen on empty iterator. if len(out.points) == 0 { return nil } else if len(out.points) < limit { // it would be as fast to just sort regularly here, // but falling down to the heapsort will mean we can get // rid of another sort order. heap.Init(&minheap) } // minheap should now contain the largest values that were encountered // during iteration. // // we want these values in ascending sorted order. We can achieve this by iteratively // removing the lowest element and putting it at the end of the array. This is analogous // to a heap sort. // // computer science is fun! result := out.points for len(out.points) > 0 { p := out.points[0] heap.Pop(&minheap) // reslice so that we can get to the element just after the heap endslice := out.points[:len(out.points)+1] endslice[len(endslice)-1] = p } // the ascending order is now in the result slice return result }