// Plan creates an execution plan for the given SelectStatement and returns an Executor. func (q *QueryExecutor) plan(stmt *influxql.SelectStatement, chunkSize int) (Executor, error) { shards := map[uint64]meta.ShardInfo{} // Shards requiring mappers. // Replace instances of "now()" with the current time, and check the resultant times. stmt.Condition = influxql.Reduce(stmt.Condition, &influxql.NowValuer{Now: time.Now().UTC()}) tmin, tmax := influxql.TimeRange(stmt.Condition) if tmax.IsZero() { tmax = time.Now() } if tmin.IsZero() { tmin = time.Unix(0, 0) } for _, src := range stmt.Sources { mm, ok := src.(*influxql.Measurement) if !ok { return nil, fmt.Errorf("invalid source type: %#v", src) } // Build the set of target shards. Using shard IDs as keys ensures each shard ID // occurs only once. shardGroups, err := q.MetaStore.ShardGroupsByTimeRange(mm.Database, mm.RetentionPolicy, tmin, tmax) if err != nil { return nil, err } for _, g := range shardGroups { for _, sh := range g.Shards { shards[sh.ID] = sh } } } // Build the Mappers, one per shard. mappers := []Mapper{} for _, sh := range shards { m, err := q.ShardMapper.CreateMapper(sh, stmt.String(), chunkSize) if err != nil { return nil, err } if m == nil { // No data for this shard, skip it. continue } mappers = append(mappers, m) } var executor Executor if len(mappers) > 0 { // All Mapper are of same type, so check first to determine correct Executor type. if _, ok := mappers[0].(*RawMapper); ok { executor = NewRawExecutor(stmt, mappers, chunkSize) } else { executor = NewAggregateExecutor(stmt, mappers) } } else { // With no mappers, the Executor type doesn't matter. executor = NewRawExecutor(stmt, nil, chunkSize) } return executor, nil }
// MustTimeRangeAndInterval returns the time range & interval of the query. // Set max to 2000-01-01 if zero. Panic on error. func MustTimeRangeAndInterval(stmt *influxql.SelectStatement, defaultMax string) (time.Time, time.Time, time.Duration) { min, max := influxql.TimeRange(stmt.Condition) interval, _, err := stmt.Dimensions.Normalize() if err != nil { panic(err.Error()) } if max.IsZero() { max = mustParseTime(defaultMax) } return min, max, interval }
// Ensure the time range of an expression can be extracted. func TestTimeRange(t *testing.T) { for i, tt := range []struct { expr string min, max string }{ // LHS VarRef {expr: `time > '2000-01-01 00:00:00'`, min: `2000-01-01T00:00:00.000000001Z`, max: `0001-01-01T00:00:00Z`}, {expr: `time >= '2000-01-01 00:00:00'`, min: `2000-01-01T00:00:00Z`, max: `0001-01-01T00:00:00Z`}, {expr: `time < '2000-01-01 00:00:00'`, min: `0001-01-01T00:00:00Z`, max: `1999-12-31T23:59:59.999999999Z`}, {expr: `time <= '2000-01-01 00:00:00'`, min: `0001-01-01T00:00:00Z`, max: `2000-01-01T00:00:00Z`}, // RHS VarRef {expr: `'2000-01-01 00:00:00' > time`, min: `0001-01-01T00:00:00Z`, max: `1999-12-31T23:59:59.999999999Z`}, {expr: `'2000-01-01 00:00:00' >= time`, min: `0001-01-01T00:00:00Z`, max: `2000-01-01T00:00:00Z`}, {expr: `'2000-01-01 00:00:00' < time`, min: `2000-01-01T00:00:00.000000001Z`, max: `0001-01-01T00:00:00Z`}, {expr: `'2000-01-01 00:00:00' <= time`, min: `2000-01-01T00:00:00Z`, max: `0001-01-01T00:00:00Z`}, // number literal {expr: `time < 10`, min: `0001-01-01T00:00:00Z`, max: `1970-01-01T00:00:00.000000009Z`}, // Equality {expr: `time = '2000-01-01 00:00:00'`, min: `2000-01-01T00:00:00Z`, max: `2000-01-01T00:00:00Z`}, // Multiple time expressions. {expr: `time >= '2000-01-01 00:00:00' AND time < '2000-01-02 00:00:00'`, min: `2000-01-01T00:00:00Z`, max: `2000-01-01T23:59:59.999999999Z`}, // Min/max crossover {expr: `time >= '2000-01-01 00:00:00' AND time <= '1999-01-01 00:00:00'`, min: `2000-01-01T00:00:00Z`, max: `1999-01-01T00:00:00Z`}, // Absolute time {expr: `time = 1388534400s`, min: `2014-01-01T00:00:00Z`, max: `2014-01-01T00:00:00Z`}, // Non-comparative expressions. {expr: `time`, min: `0001-01-01T00:00:00Z`, max: `0001-01-01T00:00:00Z`}, {expr: `time + 2`, min: `0001-01-01T00:00:00Z`, max: `0001-01-01T00:00:00Z`}, {expr: `time - '2000-01-01 00:00:00'`, min: `0001-01-01T00:00:00Z`, max: `0001-01-01T00:00:00Z`}, {expr: `time AND '2000-01-01 00:00:00'`, min: `0001-01-01T00:00:00Z`, max: `0001-01-01T00:00:00Z`}, } { // Extract time range. expr := MustParseExpr(tt.expr) min, max := influxql.TimeRange(expr) // Compare with expected min/max. if min := min.Format(time.RFC3339Nano); tt.min != min { t.Errorf("%d. %s: unexpected min:\n\nexp=%s\n\ngot=%s\n\n", i, tt.expr, tt.min, min) continue } if max := max.Format(time.RFC3339Nano); tt.max != max { t.Errorf("%d. %s: unexpected max:\n\nexp=%s\n\ngot=%s\n\n", i, tt.expr, tt.max, max) continue } } }
// Plan creates an execution plan for the given SelectStatement and returns an Executor. func (q *QueryExecutor) PlanSelect(stmt *influxql.SelectStatement, chunkSize int) (Executor, error) { shards := map[uint64]meta.ShardInfo{} // Shards requiring mappers. // It is important to "stamp" this time so that everywhere we evaluate `now()` in the statement is EXACTLY the same `now` now := time.Now().UTC() // Replace instances of "now()" with the current time, and check the resultant times. stmt.Condition = influxql.Reduce(stmt.Condition, &influxql.NowValuer{Now: now}) tmin, tmax := influxql.TimeRange(stmt.Condition) if tmax.IsZero() { tmax = now } if tmin.IsZero() { tmin = time.Unix(0, 0) } for _, src := range stmt.Sources { mm, ok := src.(*influxql.Measurement) if !ok { return nil, fmt.Errorf("invalid source type: %#v", src) } // Build the set of target shards. Using shard IDs as keys ensures each shard ID // occurs only once. shardGroups, err := q.MetaStore.ShardGroupsByTimeRange(mm.Database, mm.RetentionPolicy, tmin, tmax) if err != nil { return nil, err } for _, g := range shardGroups { for _, sh := range g.Shards { shards[sh.ID] = sh } } } // Build the Mappers, one per shard. mappers := []Mapper{} for _, sh := range shards { m, err := q.ShardMapper.CreateMapper(sh, stmt, chunkSize) if err != nil { return nil, err } if m == nil { // No data for this shard, skip it. continue } mappers = append(mappers, m) } executor := NewSelectExecutor(stmt, mappers, chunkSize) return executor, nil }
// CreateMappers will create a set of mappers that need to be run to execute the map phase of a MapReduceJob. func (tx *tx) CreateMapReduceJobs(stmt *influxql.SelectStatement, tagKeys []string) ([]*influxql.MapReduceJob, error) { jobs := []*influxql.MapReduceJob{} for _, src := range stmt.Sources { mm, ok := src.(*influxql.Measurement) if !ok { return nil, fmt.Errorf("invalid source type: %#v", src) } // get the index and the retention policy rp, err := tx.meta.RetentionPolicy(mm.Database, mm.RetentionPolicy) if err != nil { return nil, err } m := tx.store.Measurement(mm.Database, mm.Name) if m == nil { return nil, ErrMeasurementNotFound(influxql.QuoteIdent([]string{mm.Database, "", mm.Name}...)) } tx.measurement = m // Validate the fields and tags asked for exist and keep track of which are in the select vs the where var selectFields []string var whereFields []string var selectTags []string for _, n := range stmt.NamesInSelect() { if m.HasField(n) { selectFields = append(selectFields, n) continue } if !m.HasTagKey(n) { return nil, fmt.Errorf("unknown field or tag name in select clause: %s", n) } selectTags = append(selectTags, n) tagKeys = append(tagKeys, n) } for _, n := range stmt.NamesInWhere() { if n == "time" { continue } if m.HasField(n) { whereFields = append(whereFields, n) continue } if !m.HasTagKey(n) { return nil, fmt.Errorf("unknown field or tag name in where clause: %s", n) } } if len(selectFields) == 0 && len(stmt.FunctionCalls()) == 0 { return nil, fmt.Errorf("select statement must include at least one field or function call") } // Validate that group by is not a field for _, d := range stmt.Dimensions { switch e := d.Expr.(type) { case *influxql.VarRef: if !m.HasTagKey(e.Val) { return nil, fmt.Errorf("can not use field in group by clause: %s", e.Val) } } } // Grab time range from statement. tmin, tmax := influxql.TimeRange(stmt.Condition) if tmax.IsZero() { tmax = tx.now } if tmin.IsZero() { tmin = time.Unix(0, 0) } // Find shard groups within time range. var shardGroups []*meta.ShardGroupInfo for _, group := range rp.ShardGroups { if group.Overlaps(tmin, tmax) { g := group shardGroups = append(shardGroups, &g) } } if len(shardGroups) == 0 { return nil, nil } // get the group by interval, if there is one var interval int64 if d, err := stmt.GroupByInterval(); err != nil { return nil, err } else { interval = d.Nanoseconds() } // get the sorted unique tag sets for this query. tagSets, err := m.TagSets(stmt, tagKeys) if err != nil { return nil, err } for _, t := range tagSets { // make a job for each tagset job := &influxql.MapReduceJob{ MeasurementName: m.Name, TagSet: t, TMin: tmin.UnixNano(), TMax: tmax.UnixNano(), } // make a mapper for each shard that must be hit. We may need to hit multiple shards within a shard group var mappers []influxql.Mapper // create mappers for each shard we need to hit for _, sg := range shardGroups { // TODO: implement distributed queries if len(sg.Shards) != 1 { return nil, fmt.Errorf("distributed queries aren't supported yet. You have a replication policy with RF < # of servers in cluster") } shard := tx.store.Shard(sg.Shards[0].ID) if shard == nil { // the store returned nil which means we haven't written any data into this shard yet, so ignore it continue } // get the codec for this measuremnt. If this is nil it just means this measurement was // never written into this shard, so we can skip it and continue. codec := shard.FieldCodec(m.Name) if codec == nil { continue } var mapper influxql.Mapper mapper = &LocalMapper{ seriesKeys: t.SeriesKeys, shard: shard, db: shard.DB(), job: job, decoder: codec, filters: t.Filters, whereFields: whereFields, selectFields: selectFields, selectTags: selectTags, tmin: tmin.UnixNano(), tmax: tmax.UnixNano(), interval: interval, // multiple mappers may need to be merged together to get the results // for a raw query. So each mapper will have to read at least the // limit plus the offset in data points to ensure we've hit our mark limit: uint64(stmt.Limit) + uint64(stmt.Offset), } mappers = append(mappers, mapper) } job.Mappers = mappers jobs = append(jobs, job) } } // always return them in sorted order so the results from running the jobs are returned in a deterministic order sort.Sort(influxql.MapReduceJobs(jobs)) return jobs, nil }
// Plan creates an execution plan for the given SelectStatement and returns an Executor. func (q *QueryExecutor) PlanSelect(stmt *influxql.SelectStatement, chunkSize int) (Executor, error) { var shardIDs []uint64 shards := map[uint64]meta.ShardInfo{} // Shards requiring mappers. // It is important to "stamp" this time so that everywhere we evaluate `now()` in the statement is EXACTLY the same `now` now := time.Now().UTC() // Replace instances of "now()" with the current time, and check the resultant times. stmt.Condition = influxql.Reduce(stmt.Condition, &influxql.NowValuer{Now: now}) tmin, tmax := influxql.TimeRange(stmt.Condition) if tmax.IsZero() { tmax = now } if tmin.IsZero() { tmin = time.Unix(0, 0) } for _, src := range stmt.Sources { mm, ok := src.(*influxql.Measurement) if !ok { return nil, fmt.Errorf("invalid source type: %#v", src) } // Build the set of target shards. Using shard IDs as keys ensures each shard ID // occurs only once. shardGroups, err := q.MetaClient.ShardGroupsByTimeRange(mm.Database, mm.RetentionPolicy, tmin, tmax) if err != nil { return nil, err } for _, g := range shardGroups { for _, sh := range g.Shards { if _, ok := shards[sh.ID]; !ok { shards[sh.ID] = sh shardIDs = append(shardIDs, sh.ID) } } } } // Sort shard IDs to make testing deterministic. sort.Sort(uint64Slice(shardIDs)) // Build the Mappers, one per shard. mappers := []Mapper{} for _, shardID := range shardIDs { sh := shards[shardID] m, err := q.ShardMapper.CreateMapper(sh, stmt, chunkSize) if err != nil { return nil, err } if m == nil { // No data for this shard, skip it. continue } mappers = append(mappers, m) } // Certain operations on the SELECT statement can be performed by the AggregateExecutor without // assistance from the Mappers. This allows the AggregateExecutor to prepare aggregation functions // and mathematical functions. stmt.RewriteDistinct() if (stmt.IsRawQuery && !stmt.HasDistinct()) || stmt.IsSimpleDerivative() { return NewRawExecutor(stmt, mappers, chunkSize), nil } else { return NewAggregateExecutor(stmt, mappers), nil } }
// Ensure the SELECT statement can have its start and end time set func TestSelectStatement_SetTimeRange(t *testing.T) { q := "SELECT sum(value) from foo where time < now() GROUP BY time(10m)" stmt, err := influxql.NewParser(strings.NewReader(q)).ParseStatement() if err != nil { t.Fatalf("invalid statement: %q: %s", stmt, err) } s := stmt.(*influxql.SelectStatement) min, max := influxql.TimeRange(s.Condition) start := time.Now().Add(-20 * time.Hour).Round(time.Second).UTC() end := time.Now().Add(10 * time.Hour).Round(time.Second).UTC() s.SetTimeRange(start, end) min, max = influxql.TimeRange(s.Condition) if min != start { t.Fatalf("start time wasn't set properly.\n exp: %s\n got: %s", start, min) } // the end range is actually one nanosecond before the given one since end is exclusive end = end.Add(-time.Nanosecond) if max != end { t.Fatalf("end time wasn't set properly.\n exp: %s\n got: %s", end, max) } // ensure we can set a time on a select that already has one set start = time.Now().Add(-20 * time.Hour).Round(time.Second).UTC() end = time.Now().Add(10 * time.Hour).Round(time.Second).UTC() q = fmt.Sprintf("SELECT sum(value) from foo WHERE time >= %ds and time <= %ds GROUP BY time(10m)", start.Unix(), end.Unix()) stmt, err = influxql.NewParser(strings.NewReader(q)).ParseStatement() if err != nil { t.Fatalf("invalid statement: %q: %s", stmt, err) } s = stmt.(*influxql.SelectStatement) min, max = influxql.TimeRange(s.Condition) if start != min || end != max { t.Fatalf("start and end times weren't equal:\n exp: %s\n got: %s\n exp: %s\n got:%s\n", start, min, end, max) } // update and ensure it saves it start = time.Now().Add(-40 * time.Hour).Round(time.Second).UTC() end = time.Now().Add(20 * time.Hour).Round(time.Second).UTC() s.SetTimeRange(start, end) min, max = influxql.TimeRange(s.Condition) // TODO: right now the SetTimeRange can't override the start time if it's more recent than what they're trying to set it to. // shouldn't matter for our purposes with continuous queries, but fix this later if min != start { t.Fatalf("start time wasn't set properly.\n exp: %s\n got: %s", start, min) } // the end range is actually one nanosecond before the given one since end is exclusive end = end.Add(-time.Nanosecond) if max != end { t.Fatalf("end time wasn't set properly.\n exp: %s\n got: %s", end, max) } // ensure that when we set a time range other where clause conditions are still there q = "SELECT sum(value) from foo WHERE foo = 'bar' and time < now() GROUP BY time(10m)" stmt, err = influxql.NewParser(strings.NewReader(q)).ParseStatement() if err != nil { t.Fatalf("invalid statement: %q: %s", stmt, err) } s = stmt.(*influxql.SelectStatement) // update and ensure it saves it start = time.Now().Add(-40 * time.Hour).Round(time.Second).UTC() end = time.Now().Add(20 * time.Hour).Round(time.Second).UTC() s.SetTimeRange(start, end) min, max = influxql.TimeRange(s.Condition) if min != start { t.Fatalf("start time wasn't set properly.\n exp: %s\n got: %s", start, min) } // the end range is actually one nanosecond before the given one since end is exclusive end = end.Add(-time.Nanosecond) if max != end { t.Fatalf("end time wasn't set properly.\n exp: %s\n got: %s", end, max) } // ensure the where clause is there hasWhere := false influxql.WalkFunc(s.Condition, func(n influxql.Node) { if ex, ok := n.(*influxql.BinaryExpr); ok { if lhs, ok := ex.LHS.(*influxql.VarRef); ok { if lhs.Val == "foo" { if rhs, ok := ex.RHS.(*influxql.StringLiteral); ok { if rhs.Val == "bar" { hasWhere = true } } } } } }) if !hasWhere { t.Fatal("set time range cleared out the where clause") } }
// CreateMappers will create a set of mappers that need to be run to execute the map phase of a MapReduceJob. func (tx *tx) CreateMapReduceJobs(stmt *influxql.SelectStatement, tagKeys []string) ([]*influxql.MapReduceJob, error) { // Parse the source segments. database, policyName, measurement, err := splitIdent(stmt.Source.(*influxql.Measurement).Name) if err != nil { return nil, err } // Find database and retention policy. db := tx.server.databases[database] if db == nil { return nil, ErrDatabaseNotFound } rp := db.policies[policyName] if rp == nil { return nil, ErrRetentionPolicyNotFound } // Find measurement. m, err := tx.server.measurement(database, measurement) if err != nil { return nil, err } if m == nil { return nil, ErrMeasurementNotFound } tx.measurement = m tx.decoder = NewFieldCodec(m) // Validate the fields and tags asked for exist and keep track of which are in the select vs the where var selectFields []*Field var whereFields []*Field var selectTags []string for _, n := range stmt.NamesInSelect() { f := m.FieldByName(n) if f != nil { selectFields = append(selectFields, f) continue } if !m.HasTagKey(n) { return nil, fmt.Errorf("unknown field or tag name in select clause: %s", n) } selectTags = append(selectTags, n) } for _, n := range stmt.NamesInWhere() { if n == "time" { continue } f := m.FieldByName(n) if f != nil { whereFields = append(whereFields, f) continue } if !m.HasTagKey(n) { return nil, fmt.Errorf("unknown field or tag name in where clause: %s", n) } } // Grab time range from statement. tmin, tmax := influxql.TimeRange(stmt.Condition) if tmax.IsZero() { tmax = tx.now } if tmin.IsZero() { tmin = time.Unix(0, 0) } // Find shard groups within time range. var shardGroups []*ShardGroup for _, group := range rp.shardGroups { if group.Contains(tmin, tmax) { shardGroups = append(shardGroups, group) } } if len(shardGroups) == 0 { return nil, nil } // get the sorted unique tag sets for this query. tagSets := m.tagSets(stmt, tagKeys) jobs := make([]*influxql.MapReduceJob, 0, len(tagSets)) for _, t := range tagSets { // make a job for each tagset job := &influxql.MapReduceJob{ MeasurementName: m.Name, TagSet: t, TMin: tmin.UnixNano(), TMax: tmax.UnixNano(), } // make a mapper for each shard that must be hit. We may need to hit multiple shards within a shard group mappers := make([]influxql.Mapper, 0) // create mappers for each shard we need to hit for _, sg := range shardGroups { if len(sg.Shards) != 1 { // we'll only have more than 1 shard in a group when RF < # servers in cluster // TODO: implement distributed queries. panic("distributed queries not implemented yet and there are too many shards in this group") } shard := sg.Shards[0] mapper := &LocalMapper{ seriesIDs: t.SeriesIDs, db: shard.store, job: job, decoder: NewFieldCodec(m), filters: t.Filters, whereFields: whereFields, selectFields: selectFields, selectTags: selectTags, } mappers = append(mappers, mapper) } job.Mappers = mappers jobs = append(jobs, job) } // always return them in sorted order so the results from running the jobs are returned in a deterministic order sort.Sort(influxql.MapReduceJobs(jobs)) return jobs, nil }
// CreateIterators returns an iterator for a simple select statement. func (tx *tx) CreateIterators(stmt *influxql.SelectStatement) ([]influxql.Iterator, error) { // Parse the source segments. database, policyName, measurement, err := splitIdent(stmt.Source.(*influxql.Measurement).Name) if err != nil { return nil, err } // Grab time range from statement. tmin, tmax := influxql.TimeRange(stmt.Condition) if tmin.IsZero() { tmin = time.Unix(0, 1) } if tmax.IsZero() { tmax = tx.now } // Find database and retention policy. db := tx.server.databases[database] if db == nil { return nil, ErrDatabaseNotFound } rp := db.policies[policyName] if rp == nil { return nil, ErrRetentionPolicyNotFound } // Find shard groups within time range. var shardGroups []*ShardGroup for _, group := range rp.shardGroups { if timeBetweenInclusive(group.StartTime, tmin, tmax) || timeBetweenInclusive(group.EndTime, tmin, tmax) { shardGroups = append(shardGroups, group) } } if len(shardGroups) == 0 { return nil, nil } // Normalize dimensions to extract the interval. _, dimensions, err := stmt.Dimensions.Normalize() if err != nil { return nil, err } // Find measurement. m, err := tx.server.measurement(database, measurement) if err != nil { return nil, err } if m == nil { return nil, ErrMeasurementNotFound } // Find field. fieldName := stmt.Fields[0].Expr.(*influxql.VarRef).Val f := m.FieldByName(fieldName) if f == nil { return nil, fmt.Errorf("field not found: %s", fieldName) } tagSets := m.tagSets(stmt, dimensions) // Create an iterator for every shard. var itrs []influxql.Iterator for tag, set := range tagSets { for _, group := range shardGroups { // TODO: only create iterators for the shards we actually have to hit in a group for _, sh := range group.Shards { // create a series cursor for each unique series id cursors := make([]*seriesCursor, 0, len(set)) for id, cond := range set { cursors = append(cursors, &seriesCursor{id: id, condition: cond}) } // create the shard iterator that will map over all series for the shard itr := &shardIterator{ fieldName: f.Name, fieldID: f.ID, tags: tag, db: sh.store, cursors: cursors, tmin: tmin.UnixNano(), tmax: tmax.UnixNano(), } // Add to tx so the bolt transaction can be opened/closed. tx.itrs = append(tx.itrs, itr) itrs = append(itrs, itr) } } } return itrs, nil }