Beispiel #1
0
// merges two time series making sure that the resulting series has
// the union of the two series columns and the values set
// properly. will panic if the two series don't have the same name
func MergeSeries(s1, s2 *protocol.Series) *protocol.Series {
	if s1.GetName() != s2.GetName() {
		panic("the two series don't have the same name")
	}

	// if the two series have the same columns and in the same order
	// append the points and return.
	if reflect.DeepEqual(s1.Fields, s2.Fields) {
		s1.Points = append(s1.Points, s2.Points...)
		return s1
	}

	columns := map[string]struct{}{}

	for _, cs := range [][]string{s1.Fields, s2.Fields} {
		for _, c := range cs {
			columns[c] = struct{}{}
		}
	}

	points := append(pointMaps(s1), pointMaps(s2)...)

	fieldsSlice := make([]string, 0, len(columns))
	for c := range columns {
		fieldsSlice = append(fieldsSlice, c)
	}

	resultPoints := make([]*protocol.Point, 0, len(points))
	for idx, point := range points {
		resultPoint := &protocol.Point{}
		for _, field := range fieldsSlice {
			value := point[field]
			if value == nil {
				value = &protocol.FieldValue{
					IsNull: &TRUE,
				}
			}
			resultPoint.Values = append(resultPoint.Values, value)
			if idx < len(s1.Points) {
				resultPoint.Timestamp = s1.Points[idx].Timestamp
				resultPoint.SequenceNumber = s1.Points[idx].SequenceNumber
			} else {
				resultPoint.Timestamp = s2.Points[idx-len(s1.Points)].Timestamp
				resultPoint.SequenceNumber = s2.Points[idx-len(s1.Points)].SequenceNumber
			}
		}
		resultPoints = append(resultPoints, resultPoint)
	}

	// otherwise, merge the columns
	result := &protocol.Series{
		Name:   s1.Name,
		Fields: fieldsSlice,
		Points: resultPoints,
	}

	return result
}
Beispiel #2
0
func (me *MergeEngine) Yield(s *protocol.Series) (bool, error) {
	oldName := s.Name
	s.Name = &me.name
	s.Fields = append(s.Fields, "_orig_series")
	for _, p := range s.Points {
		p.Values = append(p.Values, &protocol.FieldValue{StringValue: oldName})
	}
	return me.next.Yield(s)
}
Beispiel #3
0
func Filter(query *parser.SelectQuery, series *protocol.Series) (*protocol.Series, error) {
	if query.GetWhereCondition() == nil {
		return series, nil
	}

	columns := map[string]struct{}{}
	if query.GetFromClause().Type == parser.FromClauseInnerJoin {
	outer:
		for t, cs := range query.GetResultColumns() {
			for _, c := range cs {
				// if this is a wildcard select, then drop all columns and
				// just use '*'
				if c == "*" {
					columns = make(map[string]struct{}, 1)
					columns[c] = struct{}{}
					break outer
				}
				columns[t.Name+"."+c] = struct{}{}
			}
		}
	} else {
		for _, cs := range query.GetResultColumns() {
			for _, c := range cs {
				columns[c] = struct{}{}
			}
		}
	}

	points := series.Points
	series.Points = nil
	for _, point := range points {
		ok, err := matches(query.GetWhereCondition(), series.Fields, point)

		if err != nil {
			return nil, err
		}

		if ok {
			filterColumns(columns, series.Fields, point)
			series.Points = append(series.Points, point)
		}
	}

	if _, ok := columns["*"]; !ok {
		newFields := []string{}
		for _, f := range series.Fields {
			if _, ok := columns[f]; !ok {
				continue
			}

			newFields = append(newFields, f)
		}
		series.Fields = newFields
	}
	return series, nil
}
Beispiel #4
0
func (self *AllPointsWriter) yield(series *protocol.Series) error {
	oldSeries := self.memSeries[*series.Name]
	if oldSeries == nil {
		self.memSeries[*series.Name] = series
		return nil
	}

	self.memSeries[series.GetName()] = MergeSeries(self.memSeries[series.GetName()], series)
	return nil
}
Beispiel #5
0
func (je *JoinEngine) Yield(s *protocol.Series) (bool, error) {
	log4go.Fine("JoinEngine.Yield(): %s", s)
	idx := je.tableIdx[s.GetName()]
	state := &je.tablesState[idx]
	// If the state for this table didn't contain a point already,
	// increment the number of tables ready to emit a point by
	// incrementing `pts`
	if state.lastPoint == nil {
		je.pts++
	}
	state.lastPoint = s.Points[len(s.Points)-1]
	// update the fields for this table. the fields shouldn't change
	// after the first point, so we only need to set them once
	if state.lastFields == nil {
		for _, f := range s.Fields {
			state.lastFields = append(state.lastFields, s.GetName()+"."+f)
		}
	}

	log4go.Fine("JoinEngine: pts = %d", je.pts)
	// if the number of tables ready to emit a point isn't equal to the
	// total number of tables being joined, then return
	if je.pts != len(je.tablesState) {
		return true, nil
	}

	// we arbitrarily use the timestamp of the first table's point as
	// the timestamp of the resulting point. may be we should use the
	// smalles (or largest) timestamp.
	ts := je.tablesState[0].lastPoint.Timestamp
	newSeries := &protocol.Series{
		Name:   &je.name,
		Fields: je.fields(),
		Points: []*protocol.Point{
			{
				Timestamp: ts,
				Values:    je.values(),
			},
		},
	}

	// filter the point. the user may have a where clause with the join,
	// e.g. `select * from join(foo1, foo2) where foo1.val > 10`. we
	// can't evaluate the where clause until after join happens
	filteredSeries, err := Filter(je.query, newSeries)
	if err != nil {
		return false, err
	}

	if len(filteredSeries.Points) > 0 {
		return je.next.Yield(newSeries)
	}
	return true, nil
}
Beispiel #6
0
func (self *Limiter) calculateLimitAndSlicePoints(series *protocol.Series) {
	if self.shouldLimit {
		// if the limit is 0, stop returning any points
		limit := self.limitForSeries(*series.Name)
		defer func() { self.limits[*series.Name] = limit }()
		if limit == 0 {
			series.Points = nil
			return
		}
		limit -= len(series.Points)
		if limit <= 0 {
			sliceTo := len(series.Points) + limit
			series.Points = series.Points[0:sliceTo]
			limit = 0
		}
	}
}
Beispiel #7
0
func (self *Coordinator) InterpolateValuesAndCommit(query string, db string, series *protocol.Series, targetName string, assignSequenceNumbers bool) error {
	defer common.RecoverFunc(db, query, nil)

	targetName = strings.Replace(targetName, ":series_name", *series.Name, -1)
	type sequenceKey struct {
		seriesName string
		timestamp  int64
	}
	sequenceMap := make(map[sequenceKey]int)
	r, _ := regexp.Compile(`\[.*?\]`)

	// get the fields that are used in the target name
	fieldsInTargetName := r.FindAllString(targetName, -1)
	fieldsIndeces := make([]int, 0, len(fieldsInTargetName))
	for i, f := range fieldsInTargetName {
		f = f[1 : len(f)-1]
		fieldsIndeces = append(fieldsIndeces, series.GetFieldIndex(f))
		fieldsInTargetName[i] = f
	}

	fields := make([]string, 0, len(series.Fields)-len(fieldsIndeces))

	// remove the fields used in the target name from the series fields
nextfield:
	for i, f := range series.Fields {
		for _, fi := range fieldsIndeces {
			if fi == i {
				continue nextfield
			}
		}
		fields = append(fields, f)
	}

	if r.MatchString(targetName) {
		serieses := map[string]*protocol.Series{}
		for _, point := range series.Points {
			fieldIndex := 0
			targetNameWithValues := r.ReplaceAllStringFunc(targetName, func(_ string) string {
				value := point.GetFieldValueAsString(fieldsIndeces[fieldIndex])
				fieldIndex++
				return value
			})

			p := &protocol.Point{
				Values:         make([]*protocol.FieldValue, 0, len(point.Values)-len(fieldsIndeces)),
				Timestamp:      point.Timestamp,
				SequenceNumber: point.SequenceNumber,
			}

			// remove the fields used in the target name from the series fields
		nextvalue:
			for i, v := range point.Values {
				for _, fi := range fieldsIndeces {
					if fi == i {
						continue nextvalue
					}
				}
				p.Values = append(p.Values, v)
			}

			if assignSequenceNumbers {
				key := sequenceKey{targetNameWithValues, *p.Timestamp}
				sequenceMap[key] += 1
				sequenceNumber := uint64(sequenceMap[key])
				p.SequenceNumber = &sequenceNumber
			}

			newSeries := serieses[targetNameWithValues]
			if newSeries == nil {
				newSeries = &protocol.Series{Name: &targetNameWithValues, Fields: fields, Points: []*protocol.Point{p}}
				serieses[targetNameWithValues] = newSeries
				continue
			}
			newSeries.Points = append(newSeries.Points, p)
		}
		seriesSlice := make([]*protocol.Series, 0, len(serieses))
		for _, s := range serieses {
			seriesSlice = append(seriesSlice, s)
		}
		if e := self.CommitSeriesData(db, seriesSlice, true); e != nil {
			log.Error("Couldn't write data for continuous query: ", e)
		}
	} else {
		newSeries := &protocol.Series{Name: &targetName, Fields: fields, Points: series.Points}

		if assignSequenceNumbers {
			for _, point := range newSeries.Points {
				sequenceMap[sequenceKey{targetName, *point.Timestamp}] += 1
				sequenceNumber := uint64(sequenceMap[sequenceKey{targetName, *point.Timestamp}])
				point.SequenceNumber = &sequenceNumber
			}
		}

		if e := self.CommitSeriesData(db, []*protocol.Series{newSeries}, true); e != nil {
			log.Error("Couldn't write data for continuous query: ", e)
		}
	}

	return nil
}
Beispiel #8
0
func (sip ShardIdInserterProcessor) Yield(s *protocol.Series) (bool, error) {
	s.ShardId = &sip.id
	return sip.next.Yield(s)
}
Beispiel #9
0
func (cme *CommonMergeEngine) Yield(s *protocol.Series) (bool, error) {
	log4go.Fine("CommonMergeEngine.Yield(): %s", s)
	stream := cme.streams[s.GetShardId()]
	stream.Yield(s)
	return cme.merger.Update()
}
Beispiel #10
0
// We have three types of queries:
//   1. time() without fill
//   2. time() with fill
//   3. no time()
//
// For (1) we flush as soon as a new bucket start, the prefix tree
// keeps track of the other group by columns without the time
// bucket. We reset the trie once the series is yielded. For (2), we
// keep track of all group by columns with time being the last level
// in the prefix tree. At the end of the query we step through [start
// time, end time] in self.duration steps and get the state from the
// prefix tree, using default values for groups without state in the
// prefix tree. For the last case we keep the groups in the prefix
// tree and on close() we loop through the groups and flush their
// values with a timestamp equal to now()
func (self *AggregatorEngine) aggregateValuesForSeries(series *protocol.Series) (bool, error) {
	for _, aggregator := range self.aggregators {
		if err := aggregator.InitializeFieldsMetadata(series); err != nil {
			return false, err
		}
	}

	seriesState := self.getSeriesState(series.GetName())
	currentRange := seriesState.pointsRange

	includeTimestampInGroup := self.duration != nil && self.isFillQuery
	var group []*protocol.FieldValue
	if !includeTimestampInGroup {
		group = make([]*protocol.FieldValue, len(self.elems))
	} else {
		group = make([]*protocol.FieldValue, len(self.elems)+1)
	}

	for _, point := range series.Points {
		currentRange.UpdateRange(point)

		// this is a groupby with time() and no fill, flush as soon as we
		// start a new bucket
		if self.duration != nil && !self.isFillQuery {
			timestamp := self.getTimestampFromPoint(point)
			// this is the timestamp aggregator
			if seriesState.started && seriesState.lastTimestamp != timestamp {
				self.runAggregatesForTable(series.GetName())
			}
			seriesState.lastTimestamp = timestamp
			seriesState.started = true
		}

		// get the group this point belongs to
		for idx, elem := range self.elems {
			// TODO: create an index from fieldname to index

			// TODO: We shouldn't rely on GetValue() to do arithmetic
			// operations. Instead we should cascade the arithmetic engine
			// with the aggregator engine and possibly add another
			// arithmetic engine to be able to do arithmetics on the
			// resulting aggregated data.
			value, err := GetValue(elem, series.Fields, point)
			if err != nil {
				return false, err
			}
			group[idx] = value
		}

		// if this is a fill() query, add the timestamp at the end
		if includeTimestampInGroup {
			timestamp := self.getTimestampFromPoint(point)
			group[len(self.elems)] = &protocol.FieldValue{Int64Value: protocol.Int64(timestamp)}
		}

		// update the state of the given group
		node := seriesState.trie.GetNode(group)
		var err error
		log4go.Debug("Aggregating for group %v", group)
		for idx, aggregator := range self.aggregators {
			log4go.Debug("Aggregating value for %T for group %v and state %v", aggregator, group, node.states[idx])
			node.states[idx], err = aggregator.AggregatePoint(node.states[idx], point)
			if err != nil {
				return false, err
			}
		}
	}

	return true, nil
}