// filters walks the where clause of a select statement and returns a map with all series ids // matching the where clause and any filter expression that should be applied to each func (m *Measurement) filters(stmt *influxql.SelectStatement) (map[uint64]influxql.Expr, error) { if stmt.Condition == nil || stmt.OnlyTimeDimensions() { seriesIdsToExpr := make(map[uint64]influxql.Expr) for _, id := range m.seriesIDs { seriesIdsToExpr[id] = nil } return seriesIdsToExpr, nil } ids, seriesIdsToExpr, err := m.walkWhereForSeriesIds(stmt.Condition) if err != nil { return nil, err } // Ensure every id is in the map and replace literal true expressions with // nil so the engine doesn't waste time evaluating them. for _, id := range ids { if expr, ok := seriesIdsToExpr[id]; !ok { seriesIdsToExpr[id] = nil } else if b, ok := expr.(*influxql.BooleanLiteral); ok && b.Val { seriesIdsToExpr[id] = nil } } return seriesIdsToExpr, nil }
// Plan creates an execution plan for the given SelectStatement and returns an Executor. func (q *QueryExecutor) plan(stmt *influxql.SelectStatement, chunkSize int) (Executor, error) { shards := map[uint64]meta.ShardInfo{} // Shards requiring mappers. // Replace instances of "now()" with the current time, and check the resultant times. stmt.Condition = influxql.Reduce(stmt.Condition, &influxql.NowValuer{Now: time.Now().UTC()}) tmin, tmax := influxql.TimeRange(stmt.Condition) if tmax.IsZero() { tmax = time.Now() } if tmin.IsZero() { tmin = time.Unix(0, 0) } for _, src := range stmt.Sources { mm, ok := src.(*influxql.Measurement) if !ok { return nil, fmt.Errorf("invalid source type: %#v", src) } // Build the set of target shards. Using shard IDs as keys ensures each shard ID // occurs only once. shardGroups, err := q.MetaStore.ShardGroupsByTimeRange(mm.Database, mm.RetentionPolicy, tmin, tmax) if err != nil { return nil, err } for _, g := range shardGroups { for _, sh := range g.Shards { shards[sh.ID] = sh } } } // Build the Mappers, one per shard. mappers := []Mapper{} for _, sh := range shards { m, err := q.ShardMapper.CreateMapper(sh, stmt.String(), chunkSize) if err != nil { return nil, err } if m == nil { // No data for this shard, skip it. continue } mappers = append(mappers, m) } var executor Executor if len(mappers) > 0 { // All Mapper are of same type, so check first to determine correct Executor type. if _, ok := mappers[0].(*RawMapper); ok { executor = NewRawExecutor(stmt, mappers, chunkSize) } else { executor = NewAggregateExecutor(stmt, mappers) } } else { // With no mappers, the Executor type doesn't matter. executor = NewRawExecutor(stmt, nil, chunkSize) } return executor, nil }
// WhereFields returns a list of non-"time" fields in the WHERE section of stmt. func (m *Measurement) WhereFields(stmt *influxql.SelectStatement) []string { set := newStringSet() for _, name := range stmt.NamesInWhere() { if name != "time" && m.HasField(name) { set.add(name) } } return set.list() }
// SelectTags returns a list of non-field tags in the SELECT section of stmt. func (m *Measurement) SelectTags(stmt *influxql.SelectStatement) []string { set := newStringSet() for _, name := range stmt.NamesInSelect() { if !m.HasField(name) && m.HasTagKey(name) { set.add(name) } } return set.list() }
// SelectFields returns a list of fields in the SELECT section of stmt. func (m *Measurement) SelectFields(stmt *influxql.SelectStatement) []string { set := newStringSet() for _, name := range stmt.NamesInSelect() { if m.HasField(name) { set.add(name) continue } } return set.list() }
// NewLocalMapper returns a mapper for the given shard, which will return data for the SELECT statement. func NewLocalMapper(shard *Shard, stmt *influxql.SelectStatement, chunkSize int) *LocalMapper { m := &LocalMapper{ shard: shard, stmt: stmt, chunkSize: chunkSize, cursors: make([]*tagSetCursor, 0), } m.rawMode = (stmt.IsRawQuery && !stmt.HasDistinct()) || stmt.IsSimpleDerivative() return m }
// expandWildcards returns a new SelectStatement with wildcards in the fields // and/or GROUP BY exapnded with actual field names. func (q *QueryExecutor) expandWildcards(stmt *influxql.SelectStatement) (*influxql.SelectStatement, error) { // If there are no wildcards in the statement, return it as-is. if !stmt.HasWildcard() { return stmt, nil } // Use sets to avoid duplicate field names. fieldSet := map[string]struct{}{} dimensionSet := map[string]struct{}{} var fields influxql.Fields var dimensions influxql.Dimensions // Iterate measurements in the FROM clause getting the fields & dimensions for each. for _, src := range stmt.Sources { if m, ok := src.(*influxql.Measurement); ok { // Lookup the database. The database may not exist if no data for this database // was ever written to the shard. db := q.store.DatabaseIndex(m.Database) if db == nil { return stmt, nil } // Lookup the measurement in the database. mm := db.measurements[m.Name] if mm == nil { return nil, ErrMeasurementNotFound(m.String()) } // Get the fields for this measurement. for _, name := range mm.FieldNames() { if _, ok := fieldSet[name]; ok { continue } fieldSet[name] = struct{}{} fields = append(fields, &influxql.Field{Expr: &influxql.VarRef{Val: name}}) } // Get the dimensions for this measurement. for _, t := range mm.TagKeys() { if _, ok := dimensionSet[t]; ok { continue } dimensionSet[t] = struct{}{} dimensions = append(dimensions, &influxql.Dimension{Expr: &influxql.VarRef{Val: t}}) } } } // Return a new SelectStatement with the wild cards rewritten. return stmt.RewriteWildcards(fields, dimensions), nil }
func (s *Shard) ValidateAggregateFieldsInStatement(measurementName string, stmt *influxql.SelectStatement) error { s.mu.RLock() defer s.mu.RUnlock() validateType := func(aname, fname string, t influxql.DataType) error { if t != influxql.Float && t != influxql.Integer { return fmt.Errorf("aggregate '%s' requires numerical field values. Field '%s' is of type %s", aname, fname, t) } return nil } m := s.measurementFields[measurementName] if m == nil { return fmt.Errorf("measurement not found: %s", measurementName) } // If a numerical aggregate is requested, ensure it is only performed on numeric data or on a // nested aggregate on numeric data. for _, a := range stmt.FunctionCalls() { // Check for fields like `derivative(mean(value), 1d)` var nested *influxql.Call = a if fn, ok := nested.Args[0].(*influxql.Call); ok { nested = fn } switch lit := nested.Args[0].(type) { case *influxql.VarRef: if influxql.IsNumeric(nested) { f := m.Fields[lit.Val] if err := validateType(a.Name, f.Name, f.Type); err != nil { return err } } case *influxql.Distinct: if nested.Name != "count" { return fmt.Errorf("aggregate call didn't contain a field %s", a.String()) } if influxql.IsNumeric(nested) { f := m.Fields[lit.Val] if err := validateType(a.Name, f.Name, f.Type); err != nil { return err } } default: return fmt.Errorf("aggregate call didn't contain a field %s", a.String()) } } return nil }
// createTagSetsAndFields returns the tagsets and various fields given a measurement and // SELECT statement. It also ensures that the fields and tags exist. func createTagSetsAndFields(m *Measurement, stmt *influxql.SelectStatement) (*tagSetsAndFields, error) { _, tagKeys, err := stmt.Dimensions.Normalize() if err != nil { return nil, err } sfs := newStringSet() sts := newStringSet() wfs := newStringSet() // Validate the fields and tags asked for exist and keep track of which are in the select vs the where for _, n := range stmt.NamesInSelect() { if m.HasField(n) { sfs.add(n) continue } if !m.HasTagKey(n) { return nil, fmt.Errorf("unknown field or tag name in select clause: %s", n) } sts.add(n) tagKeys = append(tagKeys, n) } for _, n := range stmt.NamesInWhere() { if n == "time" { continue } if m.HasField(n) { wfs.add(n) continue } if !m.HasTagKey(n) { return nil, fmt.Errorf("unknown field or tag name in where clause: %s", n) } } // Get the sorted unique tag sets for this statement. tagSets, err := m.TagSets(stmt, tagKeys) if err != nil { return nil, err } return &tagSetsAndFields{ tagSets: tagSets, selectFields: sfs.list(), selectTags: sts.list(), whereFields: wfs.list(), }, nil }
// rewriteSelectStatement performs any necessary query re-writing. func (lm *SelectMapper) rewriteSelectStatement(stmt *influxql.SelectStatement) (*influxql.SelectStatement, error) { var err error // Expand regex expressions in the FROM clause. sources, err := expandSources(stmt.Sources, lm.shard.index) if err != nil { return nil, err } stmt.Sources = sources // Expand wildcards in the fields or GROUP BY. stmt, err = lm.expandWildcards(stmt) if err != nil { return nil, err } stmt.RewriteDistinct() return stmt, nil }
// DimensionTagSets returns list of tag sets from the GROUP BY section of stmt. func (m *Measurement) DimensionTagSets(stmt *influxql.SelectStatement) ([]*influxql.TagSet, error) { _, tagKeys := stmt.Dimensions.Normalize() for _, n := range stmt.NamesInDimension() { if m.HasTagKey(n) { tagKeys = append(tagKeys, n) } } // Get the sorted unique tag sets for this statement. tagSets, err := m.TagSets(stmt, tagKeys) if err != nil { return nil, err } return tagSets, nil }
// RewriteSelectStatement performs any necessary query re-writing. func (db *DatabaseIndex) RewriteSelectStatement(stmt *influxql.SelectStatement) (*influxql.SelectStatement, error) { // Expand regex expressions in the FROM clause. sources, err := db.ExpandSources(stmt.Sources) if err != nil { return nil, err } stmt.Sources = sources // Expand wildcards in the fields or GROUP BY. stmt, err = db.ExpandWildcards(stmt) if err != nil { return nil, err } stmt.RewriteDistinct() return stmt, nil }
// filters walks the where clause of a select statement and returns a map with all series ids // matching the where clause and any filter expression that should be applied to each func (m *Measurement) filters(stmt *influxql.SelectStatement) map[uint32]influxql.Expr { seriesIdsToExpr := make(map[uint32]influxql.Expr) if stmt.Condition == nil || stmt.OnlyTimeDimensions() { for _, id := range m.seriesIDs { seriesIdsToExpr[id] = nil } return seriesIdsToExpr } ids, _, _ := m.walkWhereForSeriesIds(stmt.Condition, seriesIdsToExpr) // ensure every id is in the map for _, id := range ids { if _, ok := seriesIdsToExpr[id]; !ok { seriesIdsToExpr[id] = nil } } return seriesIdsToExpr }
// expandWildcards returns a new SelectStatement with wildcards in the fields // and/or GROUP BY expanded with actual field names. func (lm *LocalMapper) expandWildcards(stmt *influxql.SelectStatement) (*influxql.SelectStatement, error) { // If there are no wildcards in the statement, return it as-is. if !stmt.HasWildcard() { return stmt, nil } // Use sets to avoid duplicate field names. fieldSet := map[string]struct{}{} dimensionSet := map[string]struct{}{} var fields influxql.Fields var dimensions influxql.Dimensions // Iterate measurements in the FROM clause getting the fields & dimensions for each. for _, src := range stmt.Sources { if m, ok := src.(*influxql.Measurement); ok { // Lookup the measurement in the database. mm := lm.shard.index.Measurement(m.Name) if mm == nil { // This shard have never received data for the measurement. No Mapper // required. return stmt, nil } // Get the fields for this measurement. for _, name := range mm.FieldNames() { if _, ok := fieldSet[name]; ok { continue } fieldSet[name] = struct{}{} fields = append(fields, &influxql.Field{Expr: &influxql.VarRef{Val: name}}) } // Get the dimensions for this measurement. for _, t := range mm.TagKeys() { if _, ok := dimensionSet[t]; ok { continue } dimensionSet[t] = struct{}{} dimensions = append(dimensions, &influxql.Dimension{Expr: &influxql.VarRef{Val: t}}) } } } // Return a new SelectStatement with the wild cards rewritten. return stmt.RewriteWildcards(fields, dimensions), nil }
// Plan creates an execution plan for the given SelectStatement and returns an Executor. func (q *QueryExecutor) PlanSelect(stmt *influxql.SelectStatement, chunkSize int) (Executor, error) { shards := map[uint64]meta.ShardInfo{} // Shards requiring mappers. // It is important to "stamp" this time so that everywhere we evaluate `now()` in the statement is EXACTLY the same `now` now := time.Now().UTC() // Replace instances of "now()" with the current time, and check the resultant times. stmt.Condition = influxql.Reduce(stmt.Condition, &influxql.NowValuer{Now: now}) tmin, tmax := influxql.TimeRange(stmt.Condition) if tmax.IsZero() { tmax = now } if tmin.IsZero() { tmin = time.Unix(0, 0) } for _, src := range stmt.Sources { mm, ok := src.(*influxql.Measurement) if !ok { return nil, fmt.Errorf("invalid source type: %#v", src) } // Build the set of target shards. Using shard IDs as keys ensures each shard ID // occurs only once. shardGroups, err := q.MetaStore.ShardGroupsByTimeRange(mm.Database, mm.RetentionPolicy, tmin, tmax) if err != nil { return nil, err } for _, g := range shardGroups { for _, sh := range g.Shards { shards[sh.ID] = sh } } } // Build the Mappers, one per shard. mappers := []Mapper{} for _, sh := range shards { m, err := q.ShardMapper.CreateMapper(sh, stmt, chunkSize) if err != nil { return nil, err } if m == nil { // No data for this shard, skip it. continue } mappers = append(mappers, m) } executor := NewSelectExecutor(stmt, mappers, chunkSize) return executor, nil }
// rewriteSelectStatement performs any necessary query re-writing. func (q *QueryExecutor) rewriteSelectStatement(stmt *influxql.SelectStatement) (*influxql.SelectStatement, error) { var err error // Expand regex expressions in the FROM clause. sources, err := q.expandSources(stmt.Sources) if err != nil { return nil, err } stmt.Sources = sources // Expand wildcards in the fields or GROUP BY. if stmt.HasWildcard() { stmt, err = q.expandWildcards(stmt) if err != nil { return nil, err } } stmt.RewriteDistinct() return stmt, nil }
// derivativeInterval returns the time interval for the one (and only) derivative func func derivativeInterval(stmt *influxql.SelectStatement) (time.Duration, error) { if len(stmt.FunctionCalls()[0].Args) == 2 { return stmt.FunctionCalls()[0].Args[1].(*influxql.DurationLiteral).Val, nil } interval, err := stmt.GroupByInterval() if err != nil { return 0, err } if interval > 0 { return interval, nil } return time.Second, nil }
// CreateMappers will create a set of mappers that need to be run to execute the map phase of a MapReduceJob. func (tx *tx) CreateMapReduceJobs(stmt *influxql.SelectStatement, tagKeys []string) ([]*influxql.MapReduceJob, error) { jobs := []*influxql.MapReduceJob{} for _, src := range stmt.Sources { mm, ok := src.(*influxql.Measurement) if !ok { return nil, fmt.Errorf("invalid source type: %#v", src) } // get the index and the retention policy rp, err := tx.meta.RetentionPolicy(mm.Database, mm.RetentionPolicy) if err != nil { return nil, err } m := tx.store.Measurement(mm.Database, mm.Name) if m == nil { return nil, ErrMeasurementNotFound(influxql.QuoteIdent([]string{mm.Database, "", mm.Name}...)) } tx.measurement = m // Validate the fields and tags asked for exist and keep track of which are in the select vs the where var selectFields []string var whereFields []string var selectTags []string for _, n := range stmt.NamesInSelect() { if m.HasField(n) { selectFields = append(selectFields, n) continue } if !m.HasTagKey(n) { return nil, fmt.Errorf("unknown field or tag name in select clause: %s", n) } selectTags = append(selectTags, n) tagKeys = append(tagKeys, n) } for _, n := range stmt.NamesInWhere() { if n == "time" { continue } if m.HasField(n) { whereFields = append(whereFields, n) continue } if !m.HasTagKey(n) { return nil, fmt.Errorf("unknown field or tag name in where clause: %s", n) } } if len(selectFields) == 0 && len(stmt.FunctionCalls()) == 0 { return nil, fmt.Errorf("select statement must include at least one field or function call") } // Validate that group by is not a field for _, d := range stmt.Dimensions { switch e := d.Expr.(type) { case *influxql.VarRef: if !m.HasTagKey(e.Val) { return nil, fmt.Errorf("can not use field in group by clause: %s", e.Val) } } } // Grab time range from statement. tmin, tmax := influxql.TimeRange(stmt.Condition) if tmax.IsZero() { tmax = tx.now } if tmin.IsZero() { tmin = time.Unix(0, 0) } // Find shard groups within time range. var shardGroups []*meta.ShardGroupInfo for _, group := range rp.ShardGroups { if group.Overlaps(tmin, tmax) { g := group shardGroups = append(shardGroups, &g) } } if len(shardGroups) == 0 { return nil, nil } // get the group by interval, if there is one var interval int64 if d, err := stmt.GroupByInterval(); err != nil { return nil, err } else { interval = d.Nanoseconds() } // get the sorted unique tag sets for this query. tagSets, err := m.TagSets(stmt, tagKeys) if err != nil { return nil, err } for _, t := range tagSets { // make a job for each tagset job := &influxql.MapReduceJob{ MeasurementName: m.Name, TagSet: t, TMin: tmin.UnixNano(), TMax: tmax.UnixNano(), } // make a mapper for each shard that must be hit. We may need to hit multiple shards within a shard group var mappers []influxql.Mapper // create mappers for each shard we need to hit for _, sg := range shardGroups { // TODO: implement distributed queries if len(sg.Shards) != 1 { return nil, fmt.Errorf("distributed queries aren't supported yet. You have a replication policy with RF < # of servers in cluster") } shard := tx.store.Shard(sg.Shards[0].ID) if shard == nil { // the store returned nil which means we haven't written any data into this shard yet, so ignore it continue } // get the codec for this measuremnt. If this is nil it just means this measurement was // never written into this shard, so we can skip it and continue. codec := shard.FieldCodec(m.Name) if codec == nil { continue } var mapper influxql.Mapper mapper = &LocalMapper{ seriesKeys: t.SeriesKeys, shard: shard, db: shard.DB(), job: job, decoder: codec, filters: t.Filters, whereFields: whereFields, selectFields: selectFields, selectTags: selectTags, tmin: tmin.UnixNano(), tmax: tmax.UnixNano(), interval: interval, // multiple mappers may need to be merged together to get the results // for a raw query. So each mapper will have to read at least the // limit plus the offset in data points to ensure we've hit our mark limit: uint64(stmt.Limit) + uint64(stmt.Offset), } mappers = append(mappers, mapper) } job.Mappers = mappers jobs = append(jobs, job) } } // always return them in sorted order so the results from running the jobs are returned in a deterministic order sort.Sort(influxql.MapReduceJobs(jobs)) return jobs, nil }
// Plan creates an execution plan for the given SelectStatement and returns an Executor. func (q *QueryExecutor) PlanSelect(stmt *influxql.SelectStatement, chunkSize int) (Executor, error) { var shardIDs []uint64 shards := map[uint64]meta.ShardInfo{} // Shards requiring mappers. // It is important to "stamp" this time so that everywhere we evaluate `now()` in the statement is EXACTLY the same `now` now := time.Now().UTC() // Replace instances of "now()" with the current time, and check the resultant times. stmt.Condition = influxql.Reduce(stmt.Condition, &influxql.NowValuer{Now: now}) tmin, tmax := influxql.TimeRange(stmt.Condition) if tmax.IsZero() { tmax = now } if tmin.IsZero() { tmin = time.Unix(0, 0) } for _, src := range stmt.Sources { mm, ok := src.(*influxql.Measurement) if !ok { return nil, fmt.Errorf("invalid source type: %#v", src) } // Build the set of target shards. Using shard IDs as keys ensures each shard ID // occurs only once. shardGroups, err := q.MetaClient.ShardGroupsByTimeRange(mm.Database, mm.RetentionPolicy, tmin, tmax) if err != nil { return nil, err } for _, g := range shardGroups { for _, sh := range g.Shards { if _, ok := shards[sh.ID]; !ok { shards[sh.ID] = sh shardIDs = append(shardIDs, sh.ID) } } } } // Sort shard IDs to make testing deterministic. sort.Sort(uint64Slice(shardIDs)) // Build the Mappers, one per shard. mappers := []Mapper{} for _, shardID := range shardIDs { sh := shards[shardID] m, err := q.ShardMapper.CreateMapper(sh, stmt, chunkSize) if err != nil { return nil, err } if m == nil { // No data for this shard, skip it. continue } mappers = append(mappers, m) } // Certain operations on the SELECT statement can be performed by the AggregateExecutor without // assistance from the Mappers. This allows the AggregateExecutor to prepare aggregation functions // and mathematical functions. stmt.RewriteDistinct() if (stmt.IsRawQuery && !stmt.HasDistinct()) || stmt.IsSimpleDerivative() { return NewRawExecutor(stmt, mappers, chunkSize), nil } else { return NewAggregateExecutor(stmt, mappers), nil } }
// CreateMappers will create a set of mappers that need to be run to execute the map phase of a MapReduceJob. func (tx *tx) CreateMapReduceJobs(stmt *influxql.SelectStatement, tagKeys []string) ([]*influxql.MapReduceJob, error) { // Parse the source segments. database, policyName, measurement, err := splitIdent(stmt.Source.(*influxql.Measurement).Name) if err != nil { return nil, err } // Find database and retention policy. db := tx.server.databases[database] if db == nil { return nil, ErrDatabaseNotFound } rp := db.policies[policyName] if rp == nil { return nil, ErrRetentionPolicyNotFound } // Find measurement. m, err := tx.server.measurement(database, measurement) if err != nil { return nil, err } if m == nil { return nil, ErrMeasurementNotFound } tx.measurement = m tx.decoder = NewFieldCodec(m) // Validate the fields and tags asked for exist and keep track of which are in the select vs the where var selectFields []*Field var whereFields []*Field var selectTags []string for _, n := range stmt.NamesInSelect() { f := m.FieldByName(n) if f != nil { selectFields = append(selectFields, f) continue } if !m.HasTagKey(n) { return nil, fmt.Errorf("unknown field or tag name in select clause: %s", n) } selectTags = append(selectTags, n) } for _, n := range stmt.NamesInWhere() { if n == "time" { continue } f := m.FieldByName(n) if f != nil { whereFields = append(whereFields, f) continue } if !m.HasTagKey(n) { return nil, fmt.Errorf("unknown field or tag name in where clause: %s", n) } } // Grab time range from statement. tmin, tmax := influxql.TimeRange(stmt.Condition) if tmax.IsZero() { tmax = tx.now } if tmin.IsZero() { tmin = time.Unix(0, 0) } // Find shard groups within time range. var shardGroups []*ShardGroup for _, group := range rp.shardGroups { if group.Contains(tmin, tmax) { shardGroups = append(shardGroups, group) } } if len(shardGroups) == 0 { return nil, nil } // get the sorted unique tag sets for this query. tagSets := m.tagSets(stmt, tagKeys) jobs := make([]*influxql.MapReduceJob, 0, len(tagSets)) for _, t := range tagSets { // make a job for each tagset job := &influxql.MapReduceJob{ MeasurementName: m.Name, TagSet: t, TMin: tmin.UnixNano(), TMax: tmax.UnixNano(), } // make a mapper for each shard that must be hit. We may need to hit multiple shards within a shard group mappers := make([]influxql.Mapper, 0) // create mappers for each shard we need to hit for _, sg := range shardGroups { if len(sg.Shards) != 1 { // we'll only have more than 1 shard in a group when RF < # servers in cluster // TODO: implement distributed queries. panic("distributed queries not implemented yet and there are too many shards in this group") } shard := sg.Shards[0] mapper := &LocalMapper{ seriesIDs: t.SeriesIDs, db: shard.store, job: job, decoder: NewFieldCodec(m), filters: t.Filters, whereFields: whereFields, selectFields: selectFields, selectTags: selectTags, } mappers = append(mappers, mapper) } job.Mappers = mappers jobs = append(jobs, job) } // always return them in sorted order so the results from running the jobs are returned in a deterministic order sort.Sort(influxql.MapReduceJobs(jobs)) return jobs, nil }