Esempio n. 1
0
// filters walks the where clause of a select statement and returns a map with all series ids
// matching the where clause and any filter expression that should be applied to each
func (m *Measurement) filters(stmt *influxql.SelectStatement) (map[uint64]influxql.Expr, error) {

	if stmt.Condition == nil || stmt.OnlyTimeDimensions() {
		seriesIdsToExpr := make(map[uint64]influxql.Expr)
		for _, id := range m.seriesIDs {
			seriesIdsToExpr[id] = nil
		}
		return seriesIdsToExpr, nil
	}

	ids, seriesIdsToExpr, err := m.walkWhereForSeriesIds(stmt.Condition)
	if err != nil {
		return nil, err
	}
	// Ensure every id is in the map and replace literal true expressions with
	// nil so the engine doesn't waste time evaluating them.
	for _, id := range ids {
		if expr, ok := seriesIdsToExpr[id]; !ok {
			seriesIdsToExpr[id] = nil
		} else if b, ok := expr.(*influxql.BooleanLiteral); ok && b.Val {
			seriesIdsToExpr[id] = nil
		}
	}
	return seriesIdsToExpr, nil
}
Esempio n. 2
0
// Plan creates an execution plan for the given SelectStatement and returns an Executor.
func (q *QueryExecutor) plan(stmt *influxql.SelectStatement, chunkSize int) (Executor, error) {
	shards := map[uint64]meta.ShardInfo{} // Shards requiring mappers.

	// Replace instances of "now()" with the current time, and check the resultant times.
	stmt.Condition = influxql.Reduce(stmt.Condition, &influxql.NowValuer{Now: time.Now().UTC()})
	tmin, tmax := influxql.TimeRange(stmt.Condition)
	if tmax.IsZero() {
		tmax = time.Now()
	}
	if tmin.IsZero() {
		tmin = time.Unix(0, 0)
	}

	for _, src := range stmt.Sources {
		mm, ok := src.(*influxql.Measurement)
		if !ok {
			return nil, fmt.Errorf("invalid source type: %#v", src)
		}

		// Build the set of target shards. Using shard IDs as keys ensures each shard ID
		// occurs only once.
		shardGroups, err := q.MetaStore.ShardGroupsByTimeRange(mm.Database, mm.RetentionPolicy, tmin, tmax)
		if err != nil {
			return nil, err
		}
		for _, g := range shardGroups {
			for _, sh := range g.Shards {
				shards[sh.ID] = sh
			}
		}
	}

	// Build the Mappers, one per shard.
	mappers := []Mapper{}
	for _, sh := range shards {
		m, err := q.ShardMapper.CreateMapper(sh, stmt.String(), chunkSize)
		if err != nil {
			return nil, err
		}
		if m == nil {
			// No data for this shard, skip it.
			continue
		}
		mappers = append(mappers, m)
	}

	var executor Executor
	if len(mappers) > 0 {
		// All Mapper are of same type, so check first to determine correct Executor type.
		if _, ok := mappers[0].(*RawMapper); ok {
			executor = NewRawExecutor(stmt, mappers, chunkSize)
		} else {
			executor = NewAggregateExecutor(stmt, mappers)
		}
	} else {
		// With no mappers, the Executor type doesn't matter.
		executor = NewRawExecutor(stmt, nil, chunkSize)
	}
	return executor, nil
}
Esempio n. 3
0
// WhereFields returns a list of non-"time" fields in the WHERE section of stmt.
func (m *Measurement) WhereFields(stmt *influxql.SelectStatement) []string {
	set := newStringSet()
	for _, name := range stmt.NamesInWhere() {
		if name != "time" && m.HasField(name) {
			set.add(name)
		}
	}
	return set.list()
}
Esempio n. 4
0
// SelectTags returns a list of non-field tags in the SELECT section of stmt.
func (m *Measurement) SelectTags(stmt *influxql.SelectStatement) []string {
	set := newStringSet()
	for _, name := range stmt.NamesInSelect() {
		if !m.HasField(name) && m.HasTagKey(name) {
			set.add(name)
		}
	}
	return set.list()
}
Esempio n. 5
0
// SelectFields returns a list of fields in the SELECT section of stmt.
func (m *Measurement) SelectFields(stmt *influxql.SelectStatement) []string {
	set := newStringSet()
	for _, name := range stmt.NamesInSelect() {
		if m.HasField(name) {
			set.add(name)
			continue
		}
	}
	return set.list()
}
Esempio n. 6
0
// NewLocalMapper returns a mapper for the given shard, which will return data for the SELECT statement.
func NewLocalMapper(shard *Shard, stmt *influxql.SelectStatement, chunkSize int) *LocalMapper {
	m := &LocalMapper{
		shard:     shard,
		stmt:      stmt,
		chunkSize: chunkSize,
		cursors:   make([]*tagSetCursor, 0),
	}

	m.rawMode = (stmt.IsRawQuery && !stmt.HasDistinct()) || stmt.IsSimpleDerivative()
	return m
}
Esempio n. 7
0
// expandWildcards returns a new SelectStatement with wildcards in the fields
// and/or GROUP BY exapnded with actual field names.
func (q *QueryExecutor) expandWildcards(stmt *influxql.SelectStatement) (*influxql.SelectStatement, error) {
	// If there are no wildcards in the statement, return it as-is.
	if !stmt.HasWildcard() {
		return stmt, nil
	}

	// Use sets to avoid duplicate field names.
	fieldSet := map[string]struct{}{}
	dimensionSet := map[string]struct{}{}

	var fields influxql.Fields
	var dimensions influxql.Dimensions

	// Iterate measurements in the FROM clause getting the fields & dimensions for each.
	for _, src := range stmt.Sources {
		if m, ok := src.(*influxql.Measurement); ok {
			// Lookup the database. The database may not exist if no data for this database
			// was ever written to the shard.
			db := q.store.DatabaseIndex(m.Database)
			if db == nil {
				return stmt, nil
			}

			// Lookup the measurement in the database.
			mm := db.measurements[m.Name]
			if mm == nil {
				return nil, ErrMeasurementNotFound(m.String())
			}

			// Get the fields for this measurement.
			for _, name := range mm.FieldNames() {
				if _, ok := fieldSet[name]; ok {
					continue
				}
				fieldSet[name] = struct{}{}
				fields = append(fields, &influxql.Field{Expr: &influxql.VarRef{Val: name}})
			}

			// Get the dimensions for this measurement.
			for _, t := range mm.TagKeys() {
				if _, ok := dimensionSet[t]; ok {
					continue
				}
				dimensionSet[t] = struct{}{}
				dimensions = append(dimensions, &influxql.Dimension{Expr: &influxql.VarRef{Val: t}})
			}
		}
	}

	// Return a new SelectStatement with the wild cards rewritten.
	return stmt.RewriteWildcards(fields, dimensions), nil
}
Esempio n. 8
0
func (s *Shard) ValidateAggregateFieldsInStatement(measurementName string, stmt *influxql.SelectStatement) error {
	s.mu.RLock()
	defer s.mu.RUnlock()

	validateType := func(aname, fname string, t influxql.DataType) error {
		if t != influxql.Float && t != influxql.Integer {
			return fmt.Errorf("aggregate '%s' requires numerical field values. Field '%s' is of type %s",
				aname, fname, t)
		}
		return nil
	}

	m := s.measurementFields[measurementName]
	if m == nil {
		return fmt.Errorf("measurement not found: %s", measurementName)
	}

	// If a numerical aggregate is requested, ensure it is only performed on numeric data or on a
	// nested aggregate on numeric data.
	for _, a := range stmt.FunctionCalls() {
		// Check for fields like `derivative(mean(value), 1d)`
		var nested *influxql.Call = a
		if fn, ok := nested.Args[0].(*influxql.Call); ok {
			nested = fn
		}

		switch lit := nested.Args[0].(type) {
		case *influxql.VarRef:
			if influxql.IsNumeric(nested) {
				f := m.Fields[lit.Val]
				if err := validateType(a.Name, f.Name, f.Type); err != nil {
					return err
				}
			}
		case *influxql.Distinct:
			if nested.Name != "count" {
				return fmt.Errorf("aggregate call didn't contain a field %s", a.String())
			}
			if influxql.IsNumeric(nested) {
				f := m.Fields[lit.Val]
				if err := validateType(a.Name, f.Name, f.Type); err != nil {
					return err
				}
			}
		default:
			return fmt.Errorf("aggregate call didn't contain a field %s", a.String())
		}
	}

	return nil
}
Esempio n. 9
0
// createTagSetsAndFields returns the tagsets and various fields given a measurement and
// SELECT statement. It also ensures that the fields and tags exist.
func createTagSetsAndFields(m *Measurement, stmt *influxql.SelectStatement) (*tagSetsAndFields, error) {
	_, tagKeys, err := stmt.Dimensions.Normalize()
	if err != nil {
		return nil, err
	}

	sfs := newStringSet()
	sts := newStringSet()
	wfs := newStringSet()

	// Validate the fields and tags asked for exist and keep track of which are in the select vs the where
	for _, n := range stmt.NamesInSelect() {
		if m.HasField(n) {
			sfs.add(n)
			continue
		}
		if !m.HasTagKey(n) {
			return nil, fmt.Errorf("unknown field or tag name in select clause: %s", n)
		}
		sts.add(n)
		tagKeys = append(tagKeys, n)
	}
	for _, n := range stmt.NamesInWhere() {
		if n == "time" {
			continue
		}
		if m.HasField(n) {
			wfs.add(n)
			continue
		}
		if !m.HasTagKey(n) {
			return nil, fmt.Errorf("unknown field or tag name in where clause: %s", n)
		}
	}

	// Get the sorted unique tag sets for this statement.
	tagSets, err := m.TagSets(stmt, tagKeys)
	if err != nil {
		return nil, err
	}

	return &tagSetsAndFields{
		tagSets:      tagSets,
		selectFields: sfs.list(),
		selectTags:   sts.list(),
		whereFields:  wfs.list(),
	}, nil
}
Esempio n. 10
0
// rewriteSelectStatement performs any necessary query re-writing.
func (lm *SelectMapper) rewriteSelectStatement(stmt *influxql.SelectStatement) (*influxql.SelectStatement, error) {
	var err error
	// Expand regex expressions in the FROM clause.
	sources, err := expandSources(stmt.Sources, lm.shard.index)
	if err != nil {
		return nil, err
	}
	stmt.Sources = sources
	// Expand wildcards in the fields or GROUP BY.
	stmt, err = lm.expandWildcards(stmt)
	if err != nil {
		return nil, err
	}
	stmt.RewriteDistinct()
	return stmt, nil
}
Esempio n. 11
0
// DimensionTagSets returns list of tag sets from the GROUP BY section of stmt.
func (m *Measurement) DimensionTagSets(stmt *influxql.SelectStatement) ([]*influxql.TagSet, error) {
	_, tagKeys := stmt.Dimensions.Normalize()

	for _, n := range stmt.NamesInDimension() {
		if m.HasTagKey(n) {
			tagKeys = append(tagKeys, n)
		}
	}

	// Get the sorted unique tag sets for this statement.
	tagSets, err := m.TagSets(stmt, tagKeys)
	if err != nil {
		return nil, err
	}
	return tagSets, nil
}
Esempio n. 12
0
// RewriteSelectStatement performs any necessary query re-writing.
func (db *DatabaseIndex) RewriteSelectStatement(stmt *influxql.SelectStatement) (*influxql.SelectStatement, error) {
	// Expand regex expressions in the FROM clause.
	sources, err := db.ExpandSources(stmt.Sources)
	if err != nil {
		return nil, err
	}
	stmt.Sources = sources

	// Expand wildcards in the fields or GROUP BY.
	stmt, err = db.ExpandWildcards(stmt)
	if err != nil {
		return nil, err
	}

	stmt.RewriteDistinct()

	return stmt, nil
}
Esempio n. 13
0
// filters walks the where clause of a select statement and returns a map with all series ids
// matching the where clause and any filter expression that should be applied to each
func (m *Measurement) filters(stmt *influxql.SelectStatement) map[uint32]influxql.Expr {
	seriesIdsToExpr := make(map[uint32]influxql.Expr)
	if stmt.Condition == nil || stmt.OnlyTimeDimensions() {
		for _, id := range m.seriesIDs {
			seriesIdsToExpr[id] = nil
		}
		return seriesIdsToExpr
	}
	ids, _, _ := m.walkWhereForSeriesIds(stmt.Condition, seriesIdsToExpr)

	// ensure every id is in the map
	for _, id := range ids {
		if _, ok := seriesIdsToExpr[id]; !ok {
			seriesIdsToExpr[id] = nil
		}
	}

	return seriesIdsToExpr
}
Esempio n. 14
0
// expandWildcards returns a new SelectStatement with wildcards in the fields
// and/or GROUP BY expanded with actual field names.
func (lm *LocalMapper) expandWildcards(stmt *influxql.SelectStatement) (*influxql.SelectStatement, error) {
	// If there are no wildcards in the statement, return it as-is.
	if !stmt.HasWildcard() {
		return stmt, nil
	}
	// Use sets to avoid duplicate field names.
	fieldSet := map[string]struct{}{}
	dimensionSet := map[string]struct{}{}
	var fields influxql.Fields
	var dimensions influxql.Dimensions
	// Iterate measurements in the FROM clause getting the fields & dimensions for each.
	for _, src := range stmt.Sources {
		if m, ok := src.(*influxql.Measurement); ok {
			// Lookup the measurement in the database.
			mm := lm.shard.index.Measurement(m.Name)
			if mm == nil {
				// This shard have never received data for the measurement. No Mapper
				// required.
				return stmt, nil
			}
			// Get the fields for this measurement.
			for _, name := range mm.FieldNames() {
				if _, ok := fieldSet[name]; ok {
					continue
				}
				fieldSet[name] = struct{}{}
				fields = append(fields, &influxql.Field{Expr: &influxql.VarRef{Val: name}})
			}
			// Get the dimensions for this measurement.
			for _, t := range mm.TagKeys() {
				if _, ok := dimensionSet[t]; ok {
					continue
				}
				dimensionSet[t] = struct{}{}
				dimensions = append(dimensions, &influxql.Dimension{Expr: &influxql.VarRef{Val: t}})
			}
		}
	}
	// Return a new SelectStatement with the wild cards rewritten.
	return stmt.RewriteWildcards(fields, dimensions), nil
}
Esempio n. 15
0
// Plan creates an execution plan for the given SelectStatement and returns an Executor.
func (q *QueryExecutor) PlanSelect(stmt *influxql.SelectStatement, chunkSize int) (Executor, error) {
	shards := map[uint64]meta.ShardInfo{} // Shards requiring mappers.

	// It is important to "stamp" this time so that everywhere we evaluate `now()` in the statement is EXACTLY the same `now`
	now := time.Now().UTC()

	// Replace instances of "now()" with the current time, and check the resultant times.
	stmt.Condition = influxql.Reduce(stmt.Condition, &influxql.NowValuer{Now: now})
	tmin, tmax := influxql.TimeRange(stmt.Condition)
	if tmax.IsZero() {
		tmax = now
	}
	if tmin.IsZero() {
		tmin = time.Unix(0, 0)
	}

	for _, src := range stmt.Sources {
		mm, ok := src.(*influxql.Measurement)
		if !ok {
			return nil, fmt.Errorf("invalid source type: %#v", src)
		}

		// Build the set of target shards. Using shard IDs as keys ensures each shard ID
		// occurs only once.
		shardGroups, err := q.MetaStore.ShardGroupsByTimeRange(mm.Database, mm.RetentionPolicy, tmin, tmax)
		if err != nil {
			return nil, err
		}
		for _, g := range shardGroups {
			for _, sh := range g.Shards {
				shards[sh.ID] = sh
			}
		}
	}

	// Build the Mappers, one per shard.
	mappers := []Mapper{}
	for _, sh := range shards {
		m, err := q.ShardMapper.CreateMapper(sh, stmt, chunkSize)
		if err != nil {
			return nil, err
		}
		if m == nil {
			// No data for this shard, skip it.
			continue
		}
		mappers = append(mappers, m)
	}

	executor := NewSelectExecutor(stmt, mappers, chunkSize)
	return executor, nil
}
Esempio n. 16
0
// rewriteSelectStatement performs any necessary query re-writing.
func (q *QueryExecutor) rewriteSelectStatement(stmt *influxql.SelectStatement) (*influxql.SelectStatement, error) {
	var err error

	// Expand regex expressions in the FROM clause.
	sources, err := q.expandSources(stmt.Sources)
	if err != nil {
		return nil, err
	}
	stmt.Sources = sources

	// Expand wildcards in the fields or GROUP BY.
	if stmt.HasWildcard() {
		stmt, err = q.expandWildcards(stmt)
		if err != nil {
			return nil, err
		}
	}

	stmt.RewriteDistinct()

	return stmt, nil
}
Esempio n. 17
0
// derivativeInterval returns the time interval for the one (and only) derivative func
func derivativeInterval(stmt *influxql.SelectStatement) (time.Duration, error) {
	if len(stmt.FunctionCalls()[0].Args) == 2 {
		return stmt.FunctionCalls()[0].Args[1].(*influxql.DurationLiteral).Val, nil
	}
	interval, err := stmt.GroupByInterval()
	if err != nil {
		return 0, err
	}
	if interval > 0 {
		return interval, nil
	}
	return time.Second, nil
}
Esempio n. 18
0
// CreateMappers will create a set of mappers that need to be run to execute the map phase of a MapReduceJob.
func (tx *tx) CreateMapReduceJobs(stmt *influxql.SelectStatement, tagKeys []string) ([]*influxql.MapReduceJob, error) {
	jobs := []*influxql.MapReduceJob{}
	for _, src := range stmt.Sources {
		mm, ok := src.(*influxql.Measurement)
		if !ok {
			return nil, fmt.Errorf("invalid source type: %#v", src)
		}

		// get the index and the retention policy
		rp, err := tx.meta.RetentionPolicy(mm.Database, mm.RetentionPolicy)
		if err != nil {
			return nil, err
		}
		m := tx.store.Measurement(mm.Database, mm.Name)
		if m == nil {
			return nil, ErrMeasurementNotFound(influxql.QuoteIdent([]string{mm.Database, "", mm.Name}...))
		}

		tx.measurement = m

		// Validate the fields and tags asked for exist and keep track of which are in the select vs the where
		var selectFields []string
		var whereFields []string
		var selectTags []string

		for _, n := range stmt.NamesInSelect() {
			if m.HasField(n) {
				selectFields = append(selectFields, n)
				continue
			}
			if !m.HasTagKey(n) {
				return nil, fmt.Errorf("unknown field or tag name in select clause: %s", n)
			}
			selectTags = append(selectTags, n)
			tagKeys = append(tagKeys, n)
		}
		for _, n := range stmt.NamesInWhere() {
			if n == "time" {
				continue
			}
			if m.HasField(n) {
				whereFields = append(whereFields, n)
				continue
			}
			if !m.HasTagKey(n) {
				return nil, fmt.Errorf("unknown field or tag name in where clause: %s", n)
			}
		}

		if len(selectFields) == 0 && len(stmt.FunctionCalls()) == 0 {
			return nil, fmt.Errorf("select statement must include at least one field or function call")
		}

		// Validate that group by is not a field
		for _, d := range stmt.Dimensions {
			switch e := d.Expr.(type) {
			case *influxql.VarRef:
				if !m.HasTagKey(e.Val) {
					return nil, fmt.Errorf("can not use field in group by clause: %s", e.Val)
				}
			}
		}

		// Grab time range from statement.
		tmin, tmax := influxql.TimeRange(stmt.Condition)
		if tmax.IsZero() {
			tmax = tx.now
		}
		if tmin.IsZero() {
			tmin = time.Unix(0, 0)
		}

		// Find shard groups within time range.
		var shardGroups []*meta.ShardGroupInfo
		for _, group := range rp.ShardGroups {
			if group.Overlaps(tmin, tmax) {
				g := group
				shardGroups = append(shardGroups, &g)
			}
		}
		if len(shardGroups) == 0 {
			return nil, nil
		}

		// get the group by interval, if there is one
		var interval int64
		if d, err := stmt.GroupByInterval(); err != nil {
			return nil, err
		} else {
			interval = d.Nanoseconds()
		}

		// get the sorted unique tag sets for this query.
		tagSets, err := m.TagSets(stmt, tagKeys)
		if err != nil {
			return nil, err
		}

		for _, t := range tagSets {
			// make a job for each tagset
			job := &influxql.MapReduceJob{
				MeasurementName: m.Name,
				TagSet:          t,
				TMin:            tmin.UnixNano(),
				TMax:            tmax.UnixNano(),
			}

			// make a mapper for each shard that must be hit. We may need to hit multiple shards within a shard group
			var mappers []influxql.Mapper

			// create mappers for each shard we need to hit
			for _, sg := range shardGroups {
				// TODO: implement distributed queries
				if len(sg.Shards) != 1 {
					return nil, fmt.Errorf("distributed queries aren't supported yet. You have a replication policy with RF < # of servers in cluster")
				}
				shard := tx.store.Shard(sg.Shards[0].ID)
				if shard == nil {
					// the store returned nil which means we haven't written any data into this shard yet, so ignore it
					continue
				}

				// get the codec for this measuremnt. If this is nil it just means this measurement was
				// never written into this shard, so we can skip it and continue.
				codec := shard.FieldCodec(m.Name)
				if codec == nil {
					continue
				}

				var mapper influxql.Mapper

				mapper = &LocalMapper{
					seriesKeys:   t.SeriesKeys,
					shard:        shard,
					db:           shard.DB(),
					job:          job,
					decoder:      codec,
					filters:      t.Filters,
					whereFields:  whereFields,
					selectFields: selectFields,
					selectTags:   selectTags,
					tmin:         tmin.UnixNano(),
					tmax:         tmax.UnixNano(),
					interval:     interval,
					// multiple mappers may need to be merged together to get the results
					// for a raw query. So each mapper will have to read at least the
					// limit plus the offset in data points to ensure we've hit our mark
					limit: uint64(stmt.Limit) + uint64(stmt.Offset),
				}

				mappers = append(mappers, mapper)
			}

			job.Mappers = mappers

			jobs = append(jobs, job)
		}
	}

	// always return them in sorted order so the results from running the jobs are returned in a deterministic order
	sort.Sort(influxql.MapReduceJobs(jobs))
	return jobs, nil
}
Esempio n. 19
0
// Plan creates an execution plan for the given SelectStatement and returns an Executor.
func (q *QueryExecutor) PlanSelect(stmt *influxql.SelectStatement, chunkSize int) (Executor, error) {
	var shardIDs []uint64
	shards := map[uint64]meta.ShardInfo{} // Shards requiring mappers.

	// It is important to "stamp" this time so that everywhere we evaluate `now()` in the statement is EXACTLY the same `now`
	now := time.Now().UTC()

	// Replace instances of "now()" with the current time, and check the resultant times.
	stmt.Condition = influxql.Reduce(stmt.Condition, &influxql.NowValuer{Now: now})
	tmin, tmax := influxql.TimeRange(stmt.Condition)
	if tmax.IsZero() {
		tmax = now
	}
	if tmin.IsZero() {
		tmin = time.Unix(0, 0)
	}

	for _, src := range stmt.Sources {
		mm, ok := src.(*influxql.Measurement)
		if !ok {
			return nil, fmt.Errorf("invalid source type: %#v", src)
		}

		// Build the set of target shards. Using shard IDs as keys ensures each shard ID
		// occurs only once.
		shardGroups, err := q.MetaClient.ShardGroupsByTimeRange(mm.Database, mm.RetentionPolicy, tmin, tmax)
		if err != nil {
			return nil, err
		}
		for _, g := range shardGroups {
			for _, sh := range g.Shards {
				if _, ok := shards[sh.ID]; !ok {
					shards[sh.ID] = sh
					shardIDs = append(shardIDs, sh.ID)
				}
			}
		}
	}

	// Sort shard IDs to make testing deterministic.
	sort.Sort(uint64Slice(shardIDs))

	// Build the Mappers, one per shard.
	mappers := []Mapper{}
	for _, shardID := range shardIDs {
		sh := shards[shardID]

		m, err := q.ShardMapper.CreateMapper(sh, stmt, chunkSize)
		if err != nil {
			return nil, err
		}
		if m == nil {
			// No data for this shard, skip it.
			continue
		}
		mappers = append(mappers, m)
	}

	// Certain operations on the SELECT statement can be performed by the AggregateExecutor without
	// assistance from the Mappers. This allows the AggregateExecutor to prepare aggregation functions
	// and mathematical functions.
	stmt.RewriteDistinct()

	if (stmt.IsRawQuery && !stmt.HasDistinct()) || stmt.IsSimpleDerivative() {
		return NewRawExecutor(stmt, mappers, chunkSize), nil
	} else {
		return NewAggregateExecutor(stmt, mappers), nil
	}
}
Esempio n. 20
0
// CreateMappers will create a set of mappers that need to be run to execute the map phase of a MapReduceJob.
func (tx *tx) CreateMapReduceJobs(stmt *influxql.SelectStatement, tagKeys []string) ([]*influxql.MapReduceJob, error) {
	// Parse the source segments.
	database, policyName, measurement, err := splitIdent(stmt.Source.(*influxql.Measurement).Name)
	if err != nil {
		return nil, err
	}

	// Find database and retention policy.
	db := tx.server.databases[database]
	if db == nil {
		return nil, ErrDatabaseNotFound
	}
	rp := db.policies[policyName]
	if rp == nil {
		return nil, ErrRetentionPolicyNotFound
	}

	// Find measurement.
	m, err := tx.server.measurement(database, measurement)
	if err != nil {
		return nil, err
	}
	if m == nil {
		return nil, ErrMeasurementNotFound
	}

	tx.measurement = m
	tx.decoder = NewFieldCodec(m)

	// Validate the fields and tags asked for exist and keep track of which are in the select vs the where
	var selectFields []*Field
	var whereFields []*Field
	var selectTags []string

	for _, n := range stmt.NamesInSelect() {
		f := m.FieldByName(n)
		if f != nil {
			selectFields = append(selectFields, f)
			continue
		}
		if !m.HasTagKey(n) {
			return nil, fmt.Errorf("unknown field or tag name in select clause: %s", n)
		}
		selectTags = append(selectTags, n)
	}
	for _, n := range stmt.NamesInWhere() {
		if n == "time" {
			continue
		}
		f := m.FieldByName(n)
		if f != nil {
			whereFields = append(whereFields, f)
			continue
		}
		if !m.HasTagKey(n) {
			return nil, fmt.Errorf("unknown field or tag name in where clause: %s", n)
		}
	}

	// Grab time range from statement.
	tmin, tmax := influxql.TimeRange(stmt.Condition)
	if tmax.IsZero() {
		tmax = tx.now
	}
	if tmin.IsZero() {
		tmin = time.Unix(0, 0)
	}

	// Find shard groups within time range.
	var shardGroups []*ShardGroup
	for _, group := range rp.shardGroups {
		if group.Contains(tmin, tmax) {
			shardGroups = append(shardGroups, group)
		}
	}
	if len(shardGroups) == 0 {
		return nil, nil
	}

	// get the sorted unique tag sets for this query.
	tagSets := m.tagSets(stmt, tagKeys)

	jobs := make([]*influxql.MapReduceJob, 0, len(tagSets))
	for _, t := range tagSets {
		// make a job for each tagset
		job := &influxql.MapReduceJob{
			MeasurementName: m.Name,
			TagSet:          t,
			TMin:            tmin.UnixNano(),
			TMax:            tmax.UnixNano(),
		}

		// make a mapper for each shard that must be hit. We may need to hit multiple shards within a shard group
		mappers := make([]influxql.Mapper, 0)

		// create mappers for each shard we need to hit
		for _, sg := range shardGroups {
			if len(sg.Shards) != 1 { // we'll only have more than 1 shard in a group when RF < # servers in cluster
				// TODO: implement distributed queries.
				panic("distributed queries not implemented yet and there are too many shards in this group")
			}

			shard := sg.Shards[0]
			mapper := &LocalMapper{
				seriesIDs:    t.SeriesIDs,
				db:           shard.store,
				job:          job,
				decoder:      NewFieldCodec(m),
				filters:      t.Filters,
				whereFields:  whereFields,
				selectFields: selectFields,
				selectTags:   selectTags,
			}

			mappers = append(mappers, mapper)
		}

		job.Mappers = mappers

		jobs = append(jobs, job)
	}

	// always return them in sorted order so the results from running the jobs are returned in a deterministic order
	sort.Sort(influxql.MapReduceJobs(jobs))
	return jobs, nil
}