func (self *Passthrough) Yield(seriesIncoming *protocol.Series) (bool, error) { log.Debug("PassthroughEngine YieldSeries %d", len(seriesIncoming.Points)) self.limiter.calculateLimitAndSlicePoints(seriesIncoming) if len(seriesIncoming.Points) == 0 { return false, nil } if self.series == nil { self.series = seriesIncoming } else if self.series.GetName() != seriesIncoming.GetName() { log.Debug("Yielding to %s: %s", self.next.Name(), self.series) ok, err := self.next.Yield(self.series) if !ok || err != nil { return ok, err } self.series = seriesIncoming } else if len(self.series.Points) > self.maxPointsInResponse { log.Debug("Yielding to %s: %s", self.next.Name(), self.series) ok, err := self.next.Yield(self.series) if !ok || err != nil { return ok, err } self.series = seriesIncoming } else { self.series = common.MergeSeries(self.series, seriesIncoming) } return !self.limiter.hitLimit(seriesIncoming.GetName()), nil }
func (self *QueryEngine) YieldSeries(seriesIncoming *protocol.Series) (shouldContinue bool) { if self.explain { self.pointsRead += int64(len(seriesIncoming.Points)) } seriesName := seriesIncoming.GetName() self.seriesToPoints[seriesName] = &protocol.Series{Name: &seriesName, Fields: seriesIncoming.Fields} return self.yieldSeriesData(seriesIncoming) && !self.limiter.hitLimit(seriesIncoming.GetName()) }
// merges two time series making sure that the resulting series has // the union of the two series columns and the values set // properly. will panic if the two series don't have the same name func MergeSeries(s1, s2 *protocol.Series) *protocol.Series { if s1.GetName() != s2.GetName() { panic("the two series don't have the same name") } // if the two series have the same columns and in the same order // append the points and return. if reflect.DeepEqual(s1.Fields, s2.Fields) { s1.Points = append(s1.Points, s2.Points...) return s1 } columns := map[string]struct{}{} for _, cs := range [][]string{s1.Fields, s2.Fields} { for _, c := range cs { columns[c] = struct{}{} } } points := append(pointMaps(s1), pointMaps(s2)...) fieldsSlice := make([]string, 0, len(columns)) for c := range columns { fieldsSlice = append(fieldsSlice, c) } resultPoints := make([]*protocol.Point, 0, len(points)) for idx, point := range points { resultPoint := &protocol.Point{} for _, field := range fieldsSlice { value := point[field] if value == nil { value = &protocol.FieldValue{ IsNull: &TRUE, } } resultPoint.Values = append(resultPoint.Values, value) if idx < len(s1.Points) { resultPoint.Timestamp = s1.Points[idx].Timestamp resultPoint.SequenceNumber = s1.Points[idx].SequenceNumber } else { resultPoint.Timestamp = s2.Points[idx-len(s1.Points)].Timestamp resultPoint.SequenceNumber = s2.Points[idx-len(s1.Points)].SequenceNumber } } resultPoints = append(resultPoints, resultPoint) } // otherwise, merge the columns result := &protocol.Series{ Name: s1.Name, Fields: fieldsSlice, Points: resultPoints, } return result }
func (me *MergeEngine) Yield(s *protocol.Series) (bool, error) { oldName := s.Name s.Name = &me.name s.Fields = append(s.Fields, "_orig_series") for _, p := range s.Points { p.Values = append(p.Values, &protocol.FieldValue{StringValue: oldName}) } return me.next.Yield(s) }
func Filter(query *parser.SelectQuery, series *protocol.Series) (*protocol.Series, error) { if query.GetWhereCondition() == nil { return series, nil } columns := map[string]struct{}{} if query.GetFromClause().Type == parser.FromClauseInnerJoin { outer: for t, cs := range query.GetResultColumns() { for _, c := range cs { // if this is a wildcard select, then drop all columns and // just use '*' if c == "*" { columns = make(map[string]struct{}, 1) columns[c] = struct{}{} break outer } columns[t.Name+"."+c] = struct{}{} } } } else { for _, cs := range query.GetResultColumns() { for _, c := range cs { columns[c] = struct{}{} } } } points := series.Points series.Points = nil for _, point := range points { ok, err := matches(query.GetWhereCondition(), series.Fields, point) if err != nil { return nil, err } if ok { filterColumns(columns, series.Fields, point) series.Points = append(series.Points, point) } } if _, ok := columns["*"]; !ok { newFields := []string{} for _, f := range series.Fields { if _, ok := columns[f]; !ok { continue } newFields = append(newFields, f) } series.Fields = newFields } return series, nil }
func (self *AllPointsWriter) yield(series *protocol.Series) error { oldSeries := self.memSeries[*series.Name] if oldSeries == nil { self.memSeries[*series.Name] = series return nil } self.memSeries[series.GetName()] = MergeSeries(self.memSeries[series.GetName()], series) return nil }
func (je *JoinEngine) Yield(s *protocol.Series) (bool, error) { log4go.Fine("JoinEngine.Yield(): %s", s) idx := je.tableIdx[s.GetName()] state := &je.tablesState[idx] // If the state for this table didn't contain a point already, // increment the number of tables ready to emit a point by // incrementing `pts` if state.lastPoint == nil { je.pts++ } state.lastPoint = s.Points[len(s.Points)-1] // update the fields for this table. the fields shouldn't change // after the first point, so we only need to set them once if state.lastFields == nil { for _, f := range s.Fields { state.lastFields = append(state.lastFields, s.GetName()+"."+f) } } log4go.Fine("JoinEngine: pts = %d", je.pts) // if the number of tables ready to emit a point isn't equal to the // total number of tables being joined, then return if je.pts != len(je.tablesState) { return true, nil } // we arbitrarily use the timestamp of the first table's point as // the timestamp of the resulting point. may be we should use the // smalles (or largest) timestamp. ts := je.tablesState[0].lastPoint.Timestamp newSeries := &protocol.Series{ Name: &je.name, Fields: je.fields(), Points: []*protocol.Point{ { Timestamp: ts, Values: je.values(), }, }, } // filter the point. the user may have a where clause with the join, // e.g. `select * from join(foo1, foo2) where foo1.val > 10`. we // can't evaluate the where clause until after join happens filteredSeries, err := Filter(je.query, newSeries) if err != nil { return false, err } if len(filteredSeries.Points) > 0 { return je.next.Yield(newSeries) } return true, nil }
// modify the series to have the union of the columns from all // StreamQueries func (cme *Merger) fixFields(s *protocol.Series) { if !cme.mergeColumns { return } idx := cme.lastStreamIdx mapping := cme.resultFieldsPerStream[idx] if mapping == nil { for _, f := range cme.resultFields { index := -1 for i, sf := range s.Fields { if sf == f { index = i break } } mapping = append(mapping, index) cme.resultFieldsPerStream[idx] = mapping } } s.Fields = cme.resultFields p := s.Points[0] originalValues := p.Values p.Values = nil for _, i := range mapping { if i == -1 { p.Values = append(p.Values, nil) continue } p.Values = append(p.Values, originalValues[i]) } }
func (self *Limiter) calculateLimitAndSlicePoints(series *protocol.Series) { if self.shouldLimit { // if the limit is 0, stop returning any points limit := self.limitForSeries(*series.Name) defer func() { self.limits[*series.Name] = limit }() if limit == 0 { series.Points = nil return } limit -= len(series.Points) if limit <= 0 { sliceTo := len(series.Points) + limit series.Points = series.Points[0:sliceTo] limit = 0 } } }
func (self *PassthroughEngine) YieldSeries(seriesIncoming *protocol.Series) bool { log.Debug("PassthroughEngine YieldSeries %d", len(seriesIncoming.Points)) if *seriesIncoming.Name == "explain query" { self.responseType = &explainQueryResponse log.Debug("Response Changed!") } else { self.responseType = &queryResponse } self.limiter.calculateLimitAndSlicePoints(seriesIncoming) if len(seriesIncoming.Points) == 0 { log.Debug("Not sent == 0") return false } if self.response == nil { self.response = &protocol.Response{ Type: self.responseType, Series: seriesIncoming, } } else if self.response.Series.GetName() != seriesIncoming.GetName() { self.responseChan <- self.response self.response = &protocol.Response{ Type: self.responseType, Series: seriesIncoming, } } else if len(self.response.Series.Points) > self.maxPointsInResponse { self.responseChan <- self.response self.response = &protocol.Response{ Type: self.responseType, Series: seriesIncoming, } } else { self.response.Series = common.MergeSeries(self.response.Series, seriesIncoming) } return !self.limiter.hitLimit(seriesIncoming.GetName()) //return true }
func (sip ShardIdInserterProcessor) Yield(s *protocol.Series) (bool, error) { s.ShardId = &sip.id return sip.next.Yield(s) }
func (self *Coordinator) InterpolateValuesAndCommit(query string, db string, series *protocol.Series, targetName string, assignSequenceNumbers bool) error { defer common.RecoverFunc(db, query, nil) targetName = strings.Replace(targetName, ":series_name", *series.Name, -1) type sequenceKey struct { seriesName string timestamp int64 } sequenceMap := make(map[sequenceKey]int) r, _ := regexp.Compile(`\[.*?\]`) // get the fields that are used in the target name fieldsInTargetName := r.FindAllString(targetName, -1) fieldsIndeces := make([]int, 0, len(fieldsInTargetName)) for i, f := range fieldsInTargetName { f = f[1 : len(f)-1] fieldsIndeces = append(fieldsIndeces, series.GetFieldIndex(f)) fieldsInTargetName[i] = f } fields := make([]string, 0, len(series.Fields)-len(fieldsIndeces)) // remove the fields used in the target name from the series fields nextfield: for i, f := range series.Fields { for _, fi := range fieldsIndeces { if fi == i { continue nextfield } } fields = append(fields, f) } if r.MatchString(targetName) { serieses := map[string]*protocol.Series{} for _, point := range series.Points { fieldIndex := 0 targetNameWithValues := r.ReplaceAllStringFunc(targetName, func(_ string) string { value := point.GetFieldValueAsString(fieldsIndeces[fieldIndex]) fieldIndex++ return value }) p := &protocol.Point{ Values: make([]*protocol.FieldValue, 0, len(point.Values)-len(fieldsIndeces)), Timestamp: point.Timestamp, SequenceNumber: point.SequenceNumber, } // remove the fields used in the target name from the series fields nextvalue: for i, v := range point.Values { for _, fi := range fieldsIndeces { if fi == i { continue nextvalue } } p.Values = append(p.Values, v) } if assignSequenceNumbers { key := sequenceKey{targetNameWithValues, *p.Timestamp} sequenceMap[key] += 1 sequenceNumber := uint64(sequenceMap[key]) p.SequenceNumber = &sequenceNumber } newSeries := serieses[targetNameWithValues] if newSeries == nil { newSeries = &protocol.Series{Name: &targetNameWithValues, Fields: fields, Points: []*protocol.Point{p}} serieses[targetNameWithValues] = newSeries continue } newSeries.Points = append(newSeries.Points, p) } seriesSlice := make([]*protocol.Series, 0, len(serieses)) for _, s := range serieses { seriesSlice = append(seriesSlice, s) } if e := self.CommitSeriesData(db, seriesSlice, true); e != nil { log.Error("Couldn't write data for continuous query: ", e) } } else { newSeries := &protocol.Series{Name: &targetName, Fields: fields, Points: series.Points} if assignSequenceNumbers { for _, point := range newSeries.Points { sequenceMap[sequenceKey{targetName, *point.Timestamp}] += 1 sequenceNumber := uint64(sequenceMap[sequenceKey{targetName, *point.Timestamp}]) point.SequenceNumber = &sequenceNumber } } if e := self.CommitSeriesData(db, []*protocol.Series{newSeries}, true); e != nil { log.Error("Couldn't write data for continuous query: ", e) } } return nil }
// We have three types of queries: // 1. time() without fill // 2. time() with fill // 3. no time() // // For (1) we flush as soon as a new bucket start, the prefix tree // keeps track of the other group by columns without the time // bucket. We reset the trie once the series is yielded. For (2), we // keep track of all group by columns with time being the last level // in the prefix tree. At the end of the query we step through [start // time, end time] in self.duration steps and get the state from the // prefix tree, using default values for groups without state in the // prefix tree. For the last case we keep the groups in the prefix // tree and on close() we loop through the groups and flush their // values with a timestamp equal to now() func (self *AggregatorEngine) aggregateValuesForSeries(series *protocol.Series) (bool, error) { for _, aggregator := range self.aggregators { if err := aggregator.InitializeFieldsMetadata(series); err != nil { return false, err } } seriesState := self.getSeriesState(series.GetName()) currentRange := seriesState.pointsRange includeTimestampInGroup := self.duration != nil && self.isFillQuery var group []*protocol.FieldValue if !includeTimestampInGroup { group = make([]*protocol.FieldValue, len(self.elems)) } else { group = make([]*protocol.FieldValue, len(self.elems)+1) } for _, point := range series.Points { currentRange.UpdateRange(point) // this is a groupby with time() and no fill, flush as soon as we // start a new bucket if self.duration != nil && !self.isFillQuery { timestamp := self.getTimestampFromPoint(point) // this is the timestamp aggregator if seriesState.started && seriesState.lastTimestamp != timestamp { self.runAggregatesForTable(series.GetName()) } seriesState.lastTimestamp = timestamp seriesState.started = true } // get the group this point belongs to for idx, elem := range self.elems { // TODO: create an index from fieldname to index // TODO: We shouldn't rely on GetValue() to do arithmetic // operations. Instead we should cascade the arithmetic engine // with the aggregator engine and possibly add another // arithmetic engine to be able to do arithmetics on the // resulting aggregated data. value, err := GetValue(elem, series.Fields, point) if err != nil { return false, err } group[idx] = value } // if this is a fill() query, add the timestamp at the end if includeTimestampInGroup { timestamp := self.getTimestampFromPoint(point) group[len(self.elems)] = &protocol.FieldValue{Int64Value: protocol.Int64(timestamp)} } // update the state of the given group node := seriesState.trie.GetNode(group) var err error log4go.Debug("Aggregating for group %v", group) for idx, aggregator := range self.aggregators { log4go.Debug("Aggregating value for %T for group %v and state %v", aggregator, group, node.states[idx]) node.states[idx], err = aggregator.AggregatePoint(node.states[idx], point) if err != nil { return false, err } } } return true, nil }
// We have three types of queries: // 1. time() without fill // 2. time() with fill // 3. no time() // // For (1) we flush as soon as a new bucket start, the prefix tree // keeps track of the other group by columns without the time // bucket. We reset the trie once the series is yielded. For (2), we // keep track of all group by columns with time being the last level // in the prefix tree. At the end of the query we step through [start // time, end time] in self.duration steps and get the state from the // prefix tree, using default values for groups without state in the // prefix tree. For the last case we keep the groups in the prefix // tree and on close() we loop through the groups and flush their // values with a timestamp equal to now() func (self *QueryEngine) aggregateValuesForSeries(series *protocol.Series) error { for _, aggregator := range self.aggregators { if err := aggregator.InitializeFieldsMetadata(series); err != nil { return err } } seriesState := self.getSeriesState(series.GetName()) currentRange := seriesState.pointsRange includeTimestampInGroup := self.duration != nil && self.fillWithZero var group []*protocol.FieldValue if !includeTimestampInGroup { group = make([]*protocol.FieldValue, len(self.elems)) } else { group = make([]*protocol.FieldValue, len(self.elems)+1) } for _, point := range series.Points { currentRange.UpdateRange(point) // this is a groupby with time() and no fill, flush as soon as we // start a new bucket if self.duration != nil && !self.fillWithZero { timestamp := self.getTimestampFromPoint(point) // this is the timestamp aggregator if seriesState.started && seriesState.lastTimestamp != timestamp { self.runAggregatesForTable(series.GetName()) } seriesState.lastTimestamp = timestamp seriesState.started = true } // get the group this point belongs to for idx, elem := range self.elems { // TODO: create an index from fieldname to index value, err := GetValue(elem, series.Fields, point) if err != nil { return err } group[idx] = value } // if this is a fill() query, add the timestamp at the end if includeTimestampInGroup { timestamp := self.getTimestampFromPoint(point) group[len(self.elems)] = &protocol.FieldValue{Int64Value: protocol.Int64(timestamp)} } // update the state of the given group node := seriesState.trie.GetNode(group) var err error for idx, aggregator := range self.aggregators { node.states[idx], err = aggregator.AggregatePoint(node.states[idx], point) if err != nil { return err } } } return nil }
func (cme *CommonMergeEngine) Yield(s *protocol.Series) (bool, error) { log4go.Fine("CommonMergeEngine.Yield(): %s", s) stream := cme.streams[s.GetShardId()] stream.Yield(s) return cme.merger.Update() }