Example #1
0
// CreateMappers will create a set of mappers that need to be run to execute the map phase of a MapReduceJob.
func (tx *tx) CreateMapReduceJobs(stmt *influxql.SelectStatement, tagKeys []string) ([]*influxql.MapReduceJob, error) {
	jobs := []*influxql.MapReduceJob{}
	for _, src := range stmt.Sources {
		mm, ok := src.(*influxql.Measurement)
		if !ok {
			return nil, fmt.Errorf("invalid source type: %#v", src)
		}

		// get the index and the retention policy
		rp, err := tx.meta.RetentionPolicy(mm.Database, mm.RetentionPolicy)
		if err != nil {
			return nil, err
		}
		m := tx.store.Measurement(mm.Database, mm.Name)
		if m == nil {
			return nil, ErrMeasurementNotFound(influxql.QuoteIdent([]string{mm.Database, "", mm.Name}...))
		}

		tx.measurement = m

		// Validate the fields and tags asked for exist and keep track of which are in the select vs the where
		var selectFields []string
		var whereFields []string
		var selectTags []string

		for _, n := range stmt.NamesInSelect() {
			if m.HasField(n) {
				selectFields = append(selectFields, n)
				continue
			}
			if !m.HasTagKey(n) {
				return nil, fmt.Errorf("unknown field or tag name in select clause: %s", n)
			}
			selectTags = append(selectTags, n)
			tagKeys = append(tagKeys, n)
		}
		for _, n := range stmt.NamesInWhere() {
			if n == "time" {
				continue
			}
			if m.HasField(n) {
				whereFields = append(whereFields, n)
				continue
			}
			if !m.HasTagKey(n) {
				return nil, fmt.Errorf("unknown field or tag name in where clause: %s", n)
			}
		}

		if len(selectFields) == 0 && len(stmt.FunctionCalls()) == 0 {
			return nil, fmt.Errorf("select statement must include at least one field or function call")
		}

		// Validate that group by is not a field
		for _, d := range stmt.Dimensions {
			switch e := d.Expr.(type) {
			case *influxql.VarRef:
				if !m.HasTagKey(e.Val) {
					return nil, fmt.Errorf("can not use field in group by clause: %s", e.Val)
				}
			}
		}

		// Grab time range from statement.
		tmin, tmax := influxql.TimeRange(stmt.Condition)
		if tmax.IsZero() {
			tmax = tx.now
		}
		if tmin.IsZero() {
			tmin = time.Unix(0, 0)
		}

		// Find shard groups within time range.
		var shardGroups []*meta.ShardGroupInfo
		for _, group := range rp.ShardGroups {
			if group.Overlaps(tmin, tmax) {
				g := group
				shardGroups = append(shardGroups, &g)
			}
		}
		if len(shardGroups) == 0 {
			return nil, nil
		}

		// get the group by interval, if there is one
		var interval int64
		if d, err := stmt.GroupByInterval(); err != nil {
			return nil, err
		} else {
			interval = d.Nanoseconds()
		}

		// get the sorted unique tag sets for this query.
		tagSets, err := m.TagSets(stmt, tagKeys)
		if err != nil {
			return nil, err
		}

		for _, t := range tagSets {
			// make a job for each tagset
			job := &influxql.MapReduceJob{
				MeasurementName: m.Name,
				TagSet:          t,
				TMin:            tmin.UnixNano(),
				TMax:            tmax.UnixNano(),
			}

			// make a mapper for each shard that must be hit. We may need to hit multiple shards within a shard group
			var mappers []influxql.Mapper

			// create mappers for each shard we need to hit
			for _, sg := range shardGroups {
				// TODO: implement distributed queries
				if len(sg.Shards) != 1 {
					return nil, fmt.Errorf("distributed queries aren't supported yet. You have a replication policy with RF < # of servers in cluster")
				}
				shard := tx.store.Shard(sg.Shards[0].ID)
				if shard == nil {
					// the store returned nil which means we haven't written any data into this shard yet, so ignore it
					continue
				}

				// get the codec for this measuremnt. If this is nil it just means this measurement was
				// never written into this shard, so we can skip it and continue.
				codec := shard.FieldCodec(m.Name)
				if codec == nil {
					continue
				}

				var mapper influxql.Mapper

				mapper = &LocalMapper{
					seriesKeys:   t.SeriesKeys,
					shard:        shard,
					db:           shard.DB(),
					job:          job,
					decoder:      codec,
					filters:      t.Filters,
					whereFields:  whereFields,
					selectFields: selectFields,
					selectTags:   selectTags,
					tmin:         tmin.UnixNano(),
					tmax:         tmax.UnixNano(),
					interval:     interval,
					// multiple mappers may need to be merged together to get the results
					// for a raw query. So each mapper will have to read at least the
					// limit plus the offset in data points to ensure we've hit our mark
					limit: uint64(stmt.Limit) + uint64(stmt.Offset),
				}

				mappers = append(mappers, mapper)
			}

			job.Mappers = mappers

			jobs = append(jobs, job)
		}
	}

	// always return them in sorted order so the results from running the jobs are returned in a deterministic order
	sort.Sort(influxql.MapReduceJobs(jobs))
	return jobs, nil
}
Example #2
0
// CreateMappers will create a set of mappers that need to be run to execute the map phase of a MapReduceJob.
func (tx *tx) CreateMapReduceJobs(stmt *influxql.SelectStatement, tagKeys []string) ([]*influxql.MapReduceJob, error) {
	// Parse the source segments.
	database, policyName, measurement, err := splitIdent(stmt.Source.(*influxql.Measurement).Name)
	if err != nil {
		return nil, err
	}

	// Find database and retention policy.
	db := tx.server.databases[database]
	if db == nil {
		return nil, ErrDatabaseNotFound
	}
	rp := db.policies[policyName]
	if rp == nil {
		return nil, ErrRetentionPolicyNotFound
	}

	// Find measurement.
	m, err := tx.server.measurement(database, measurement)
	if err != nil {
		return nil, err
	}
	if m == nil {
		return nil, ErrMeasurementNotFound
	}

	tx.measurement = m
	tx.decoder = NewFieldCodec(m)

	// Validate the fields and tags asked for exist and keep track of which are in the select vs the where
	var selectFields []*Field
	var whereFields []*Field
	var selectTags []string

	for _, n := range stmt.NamesInSelect() {
		f := m.FieldByName(n)
		if f != nil {
			selectFields = append(selectFields, f)
			continue
		}
		if !m.HasTagKey(n) {
			return nil, fmt.Errorf("unknown field or tag name in select clause: %s", n)
		}
		selectTags = append(selectTags, n)
	}
	for _, n := range stmt.NamesInWhere() {
		if n == "time" {
			continue
		}
		f := m.FieldByName(n)
		if f != nil {
			whereFields = append(whereFields, f)
			continue
		}
		if !m.HasTagKey(n) {
			return nil, fmt.Errorf("unknown field or tag name in where clause: %s", n)
		}
	}

	// Grab time range from statement.
	tmin, tmax := influxql.TimeRange(stmt.Condition)
	if tmax.IsZero() {
		tmax = tx.now
	}
	if tmin.IsZero() {
		tmin = time.Unix(0, 0)
	}

	// Find shard groups within time range.
	var shardGroups []*ShardGroup
	for _, group := range rp.shardGroups {
		if group.Contains(tmin, tmax) {
			shardGroups = append(shardGroups, group)
		}
	}
	if len(shardGroups) == 0 {
		return nil, nil
	}

	// get the sorted unique tag sets for this query.
	tagSets := m.tagSets(stmt, tagKeys)

	jobs := make([]*influxql.MapReduceJob, 0, len(tagSets))
	for _, t := range tagSets {
		// make a job for each tagset
		job := &influxql.MapReduceJob{
			MeasurementName: m.Name,
			TagSet:          t,
			TMin:            tmin.UnixNano(),
			TMax:            tmax.UnixNano(),
		}

		// make a mapper for each shard that must be hit. We may need to hit multiple shards within a shard group
		mappers := make([]influxql.Mapper, 0)

		// create mappers for each shard we need to hit
		for _, sg := range shardGroups {
			if len(sg.Shards) != 1 { // we'll only have more than 1 shard in a group when RF < # servers in cluster
				// TODO: implement distributed queries.
				panic("distributed queries not implemented yet and there are too many shards in this group")
			}

			shard := sg.Shards[0]
			mapper := &LocalMapper{
				seriesIDs:    t.SeriesIDs,
				db:           shard.store,
				job:          job,
				decoder:      NewFieldCodec(m),
				filters:      t.Filters,
				whereFields:  whereFields,
				selectFields: selectFields,
				selectTags:   selectTags,
			}

			mappers = append(mappers, mapper)
		}

		job.Mappers = mappers

		jobs = append(jobs, job)
	}

	// always return them in sorted order so the results from running the jobs are returned in a deterministic order
	sort.Sort(influxql.MapReduceJobs(jobs))
	return jobs, nil
}