// WhereFields returns a list of non-"time" fields in the WHERE section of stmt. func (m *Measurement) WhereFields(stmt *influxql.SelectStatement) []string { set := newStringSet() for _, name := range stmt.NamesInWhere() { if name != "time" && m.HasField(name) { set.add(name) } } return set.list() }
// createTagSetsAndFields returns the tagsets and various fields given a measurement and // SELECT statement. func createTagSetsAndFields(m *Measurement, stmt *influxql.SelectStatement) (*tagSetsAndFields, error) { _, tagKeys, err := stmt.Dimensions.Normalize() if err != nil { return nil, err } sfs := newStringSet() sts := newStringSet() wfs := newStringSet() // Validate the fields and tags asked for exist and keep track of which are in the select vs the where for _, n := range stmt.NamesInSelect() { if m.HasField(n) { sfs.add(n) continue } if m.HasTagKey(n) { sts.add(n) } } for _, n := range stmt.NamesInDimension() { if m.HasTagKey(n) { tagKeys = append(tagKeys, n) } } for _, n := range stmt.NamesInWhere() { if n == "time" { continue } if m.HasField(n) { wfs.add(n) continue } } // Get the sorted unique tag sets for this statement. tagSets, err := m.TagSets(stmt, tagKeys) if err != nil { return nil, err } return &tagSetsAndFields{ tagSets: tagSets, selectFields: sfs.list(), selectTags: sts.list(), whereFields: wfs.list(), }, nil }
// CreateMappers will create a set of mappers that need to be run to execute the map phase of a MapReduceJob. func (tx *tx) CreateMapReduceJobs(stmt *influxql.SelectStatement, tagKeys []string) ([]*influxql.MapReduceJob, error) { jobs := []*influxql.MapReduceJob{} for _, src := range stmt.Sources { mm, ok := src.(*influxql.Measurement) if !ok { return nil, fmt.Errorf("invalid source type: %#v", src) } // get the index and the retention policy rp, err := tx.meta.RetentionPolicy(mm.Database, mm.RetentionPolicy) if err != nil { return nil, err } m := tx.store.Measurement(mm.Database, mm.Name) if m == nil { return nil, ErrMeasurementNotFound(influxql.QuoteIdent([]string{mm.Database, "", mm.Name}...)) } tx.measurement = m // Validate the fields and tags asked for exist and keep track of which are in the select vs the where var selectFields []string var whereFields []string var selectTags []string for _, n := range stmt.NamesInSelect() { if m.HasField(n) { selectFields = append(selectFields, n) continue } if !m.HasTagKey(n) { return nil, fmt.Errorf("unknown field or tag name in select clause: %s", n) } selectTags = append(selectTags, n) tagKeys = append(tagKeys, n) } for _, n := range stmt.NamesInWhere() { if n == "time" { continue } if m.HasField(n) { whereFields = append(whereFields, n) continue } if !m.HasTagKey(n) { return nil, fmt.Errorf("unknown field or tag name in where clause: %s", n) } } if len(selectFields) == 0 && len(stmt.FunctionCalls()) == 0 { return nil, fmt.Errorf("select statement must include at least one field or function call") } // Validate that group by is not a field for _, d := range stmt.Dimensions { switch e := d.Expr.(type) { case *influxql.VarRef: if !m.HasTagKey(e.Val) { return nil, fmt.Errorf("can not use field in group by clause: %s", e.Val) } } } // Grab time range from statement. tmin, tmax := influxql.TimeRange(stmt.Condition) if tmax.IsZero() { tmax = tx.now } if tmin.IsZero() { tmin = time.Unix(0, 0) } // Find shard groups within time range. var shardGroups []*meta.ShardGroupInfo for _, group := range rp.ShardGroups { if group.Overlaps(tmin, tmax) { g := group shardGroups = append(shardGroups, &g) } } if len(shardGroups) == 0 { return nil, nil } // get the group by interval, if there is one var interval int64 if d, err := stmt.GroupByInterval(); err != nil { return nil, err } else { interval = d.Nanoseconds() } // get the sorted unique tag sets for this query. tagSets, err := m.TagSets(stmt, tagKeys) if err != nil { return nil, err } for _, t := range tagSets { // make a job for each tagset job := &influxql.MapReduceJob{ MeasurementName: m.Name, TagSet: t, TMin: tmin.UnixNano(), TMax: tmax.UnixNano(), } // make a mapper for each shard that must be hit. We may need to hit multiple shards within a shard group var mappers []influxql.Mapper // create mappers for each shard we need to hit for _, sg := range shardGroups { // TODO: implement distributed queries if len(sg.Shards) != 1 { return nil, fmt.Errorf("distributed queries aren't supported yet. You have a replication policy with RF < # of servers in cluster") } shard := tx.store.Shard(sg.Shards[0].ID) if shard == nil { // the store returned nil which means we haven't written any data into this shard yet, so ignore it continue } // get the codec for this measuremnt. If this is nil it just means this measurement was // never written into this shard, so we can skip it and continue. codec := shard.FieldCodec(m.Name) if codec == nil { continue } var mapper influxql.Mapper mapper = &LocalMapper{ seriesKeys: t.SeriesKeys, shard: shard, db: shard.DB(), job: job, decoder: codec, filters: t.Filters, whereFields: whereFields, selectFields: selectFields, selectTags: selectTags, tmin: tmin.UnixNano(), tmax: tmax.UnixNano(), interval: interval, // multiple mappers may need to be merged together to get the results // for a raw query. So each mapper will have to read at least the // limit plus the offset in data points to ensure we've hit our mark limit: uint64(stmt.Limit) + uint64(stmt.Offset), } mappers = append(mappers, mapper) } job.Mappers = mappers jobs = append(jobs, job) } } // always return them in sorted order so the results from running the jobs are returned in a deterministic order sort.Sort(influxql.MapReduceJobs(jobs)) return jobs, nil }
// CreateMappers will create a set of mappers that need to be run to execute the map phase of a MapReduceJob. func (tx *tx) CreateMapReduceJobs(stmt *influxql.SelectStatement, tagKeys []string) ([]*influxql.MapReduceJob, error) { // Parse the source segments. database, policyName, measurement, err := splitIdent(stmt.Source.(*influxql.Measurement).Name) if err != nil { return nil, err } // Find database and retention policy. db := tx.server.databases[database] if db == nil { return nil, ErrDatabaseNotFound } rp := db.policies[policyName] if rp == nil { return nil, ErrRetentionPolicyNotFound } // Find measurement. m, err := tx.server.measurement(database, measurement) if err != nil { return nil, err } if m == nil { return nil, ErrMeasurementNotFound } tx.measurement = m tx.decoder = NewFieldCodec(m) // Validate the fields and tags asked for exist and keep track of which are in the select vs the where var selectFields []*Field var whereFields []*Field var selectTags []string for _, n := range stmt.NamesInSelect() { f := m.FieldByName(n) if f != nil { selectFields = append(selectFields, f) continue } if !m.HasTagKey(n) { return nil, fmt.Errorf("unknown field or tag name in select clause: %s", n) } selectTags = append(selectTags, n) } for _, n := range stmt.NamesInWhere() { if n == "time" { continue } f := m.FieldByName(n) if f != nil { whereFields = append(whereFields, f) continue } if !m.HasTagKey(n) { return nil, fmt.Errorf("unknown field or tag name in where clause: %s", n) } } // Grab time range from statement. tmin, tmax := influxql.TimeRange(stmt.Condition) if tmax.IsZero() { tmax = tx.now } if tmin.IsZero() { tmin = time.Unix(0, 0) } // Find shard groups within time range. var shardGroups []*ShardGroup for _, group := range rp.shardGroups { if group.Contains(tmin, tmax) { shardGroups = append(shardGroups, group) } } if len(shardGroups) == 0 { return nil, nil } // get the sorted unique tag sets for this query. tagSets := m.tagSets(stmt, tagKeys) jobs := make([]*influxql.MapReduceJob, 0, len(tagSets)) for _, t := range tagSets { // make a job for each tagset job := &influxql.MapReduceJob{ MeasurementName: m.Name, TagSet: t, TMin: tmin.UnixNano(), TMax: tmax.UnixNano(), } // make a mapper for each shard that must be hit. We may need to hit multiple shards within a shard group mappers := make([]influxql.Mapper, 0) // create mappers for each shard we need to hit for _, sg := range shardGroups { if len(sg.Shards) != 1 { // we'll only have more than 1 shard in a group when RF < # servers in cluster // TODO: implement distributed queries. panic("distributed queries not implemented yet and there are too many shards in this group") } shard := sg.Shards[0] mapper := &LocalMapper{ seriesIDs: t.SeriesIDs, db: shard.store, job: job, decoder: NewFieldCodec(m), filters: t.Filters, whereFields: whereFields, selectFields: selectFields, selectTags: selectTags, } mappers = append(mappers, mapper) } job.Mappers = mappers jobs = append(jobs, job) } // always return them in sorted order so the results from running the jobs are returned in a deterministic order sort.Sort(influxql.MapReduceJobs(jobs)) return jobs, nil }