func NewQueryEngine(query *parser.SelectQuery, responseChan chan *protocol.Response) *QueryEngine { limit := query.Limit shouldLimit := true if limit == 0 { shouldLimit = false } queryEngine := &QueryEngine{ query: query, where: query.GetWhereCondition(), limit: limit, limits: make(map[string]int), shouldLimit: shouldLimit, responseChan: responseChan, seriesToPoints: make(map[string]*protocol.Series), } yield := func(series *protocol.Series) error { response := &protocol.Response{Type: &responseQuery, Series: series} responseChan <- response return nil } if query.HasAggregates() { queryEngine.executeCountQueryWithGroupBy(query, yield) } else if containsArithmeticOperators(query) { queryEngine.executeArithmeticQuery(query, yield) } else { queryEngine.distributeQuery(query, yield) } return queryEngine }
func (self *LevelDbDatastore) ExecuteQuery(user common.User, database string, query *parser.SelectQuery, yield func(*protocol.Series) error, ringFilter func(database, series *string, time *int64) bool) error { seriesAndColumns := query.GetReferencedColumns() hasAccess := true for series, columns := range seriesAndColumns { if regex, ok := series.GetCompiledRegex(); ok { seriesNames := self.getSeriesForDbAndRegex(database, regex) for _, name := range seriesNames { if !user.HasReadAccess(name) { hasAccess = false continue } err := self.executeQueryForSeries(database, name, columns, query, yield, ringFilter) if err != nil { return err } } } else { if !user.HasReadAccess(series.Name) { hasAccess = false continue } err := self.executeQueryForSeries(database, series.Name, columns, query, yield, ringFilter) if err != nil { return err } } } if !hasAccess { return fmt.Errorf("You don't have permission to access one or more time series") } return nil }
// Return the number of dropped ticks from filtering. if the series // had more than one alias, returns the min of all dropped ticks func (self *LevelDbDatastore) sendBatch(query *parser.SelectQuery, series *protocol.Series, yield func(series *protocol.Series) error) (int, error) { dropped := int(math.MaxInt32) for _, alias := range query.GetTableAliases(*series.Name) { _alias := alias newSeries := &protocol.Series{Name: &_alias, Points: series.Points, Fields: series.Fields} lengthBeforeFiltering := len(newSeries.Points) var filteredResult *protocol.Series var err error if query.GetFromClause().Type == parser.FromClauseInnerJoin { filteredResult = newSeries } else { filteredResult, err = Filter(query, newSeries) if err != nil { return 0, err } } _dropped := lengthBeforeFiltering - len(filteredResult.Points) if _dropped < dropped { dropped = _dropped } if err := yield(filteredResult); err != nil { return 0, err } } return dropped, nil }
func containsArithmeticOperators(query *parser.SelectQuery) bool { for _, column := range query.GetColumnNames() { if column.Type == parser.ValueExpression { return true } } return false }
func isAggregateQuery(query *parser.SelectQuery) bool { for _, column := range query.GetColumnNames() { if column.IsFunctionCall() { return true } } return false }
func Filter(query *parser.SelectQuery, series *protocol.Series) (*protocol.Series, error) { if query.GetWhereCondition() == nil { return series, nil } columns := map[string]struct{}{} if query.GetFromClause().Type == parser.FromClauseInnerJoin { outer: for t, cs := range query.GetResultColumns() { for _, c := range cs { // if this is a wildcard select, then drop all columns and // just use '*' if c == "*" { columns = make(map[string]struct{}, 1) columns[c] = struct{}{} break outer } columns[t.Name+"."+c] = struct{}{} } } } else { for _, cs := range query.GetResultColumns() { for _, c := range cs { columns[c] = struct{}{} } } } points := series.Points series.Points = nil for _, point := range points { ok, err := matches(query.GetWhereCondition(), series.Fields, point) if err != nil { return nil, err } if ok { fmt.Printf("columns: %v, fields: %v\n", columns, series.Fields) filterColumns(columns, series.Fields, point) series.Points = append(series.Points, point) } } if _, ok := columns["*"]; !ok { newFields := []string{} for _, f := range series.Fields { if _, ok := columns[f]; !ok { continue } newFields = append(newFields, f) } series.Fields = newFields } return series, nil }
func (self *QueryEngine) executeArithmeticQuery(query *parser.SelectQuery, yield func(*protocol.Series) error) error { names := map[string]*parser.Value{} for idx, v := range query.GetColumnNames() { switch v.Type { case parser.ValueSimpleName: names[v.Name] = v case parser.ValueFunctionCall: names[v.Name] = v case parser.ValueExpression: if v.Alias != "" { names[v.Alias] = v } else { names["expr"+strconv.Itoa(idx)] = v } } } return self.distributeQuery(query, func(series *protocol.Series) error { if len(series.Points) == 0 { yield(series) return nil } newSeries := &protocol.Series{ Name: series.Name, } // create the new column names for name, _ := range names { newSeries.Fields = append(newSeries.Fields, name) } for _, point := range series.Points { newPoint := &protocol.Point{ Timestamp: point.Timestamp, SequenceNumber: point.SequenceNumber, } for _, field := range newSeries.Fields { value := names[field] v, err := GetValue(value, series.Fields, point) if err != nil { log.Error("Error in arithmetic computation: %s", err) return err } newPoint.Values = append(newPoint.Values, v) } newSeries.Points = append(newSeries.Points, newPoint) } yield(newSeries) return nil }) }
func getJoinYield(query *parser.SelectQuery, yield func(*protocol.Series) error) func(*protocol.Series) error { var lastPoint1 *protocol.Point var lastFields1 []string var lastPoint2 *protocol.Point var lastFields2 []string table1 := query.GetFromClause().Names[0].GetAlias() table2 := query.GetFromClause().Names[1].GetAlias() name := table1 + "_join_" + table2 return mergeYield(table1, table2, false, query.Ascending, func(s *protocol.Series) error { fmt.Printf("join series: %d\n", len(s.Points)) if *s.Name == table1 { lastPoint1 = s.Points[len(s.Points)-1] if lastFields1 == nil { for _, f := range s.Fields { lastFields1 = append(lastFields1, table1+"."+f) } } } if *s.Name == table2 { lastPoint2 = s.Points[len(s.Points)-1] if lastFields2 == nil { for _, f := range s.Fields { lastFields2 = append(lastFields2, table2+"."+f) } } } if lastPoint1 == nil || lastPoint2 == nil { return nil } newSeries := &protocol.Series{ Name: &name, Fields: append(lastFields1, lastFields2...), Points: []*protocol.Point{ &protocol.Point{ Values: append(lastPoint1.Values, lastPoint2.Values...), Timestamp: lastPoint2.Timestamp, }, }, } lastPoint1 = nil lastPoint2 = nil filteredSeries, _ := Filter(query, newSeries) if len(filteredSeries.Points) > 0 { return yield(newSeries) } return nil }) }
// Distributes the query across the cluster and combines the results. Yields as they come in ensuring proper order. // TODO: make this work even if there is a downed server in the cluster func (self *CoordinatorImpl) DistributeQuery(user common.User, db string, query *parser.SelectQuery, localOnly bool, yield func(*protocol.Series) error) error { if self.clusterConfiguration.IsSingleServer() || localOnly { return self.datastore.ExecuteQuery(user, db, query, yield, nil) } servers, replicationFactor := self.clusterConfiguration.GetServersToMakeQueryTo(&db) id := atomic.AddUint32(&self.requestId, uint32(1)) userName := user.GetName() isDbUser := !user.IsClusterAdmin() responseChannels := make([]chan *protocol.Response, 0, len(servers)+1) queryString := query.GetQueryString() var localServerToQuery *serverToQuery for _, server := range servers { if server.server.Id == self.clusterConfiguration.localServerId { localServerToQuery = server } else { request := &protocol.Request{Type: &queryRequest, Query: &queryString, Id: &id, Database: &db, UserName: &userName, IsDbUser: &isDbUser} if server.ringLocationsToQuery != replicationFactor { r := server.ringLocationsToQuery request.RingLocationsToQuery = &r } responseChan := make(chan *protocol.Response, 3) server.server.MakeRequest(request, responseChan) responseChannels = append(responseChannels, responseChan) } } local := make(chan *protocol.Response) nextPointMap := make(map[string]*NextPoint) // TODO: this style of wrapping the series in response objects with the // last point time is duplicated in the request handler. Refactor... sendFromLocal := func(series *protocol.Series) error { response := createResponse(nextPointMap, series, nil) local <- response return nil } responseChannels = append(responseChannels, local) // TODO: wire up the willreturnsingleseries method and uncomment this line and delete the next one. // isSingleSeriesQuery := query.WillReturnSingleSeries() isSingleSeriesQuery := false go func() { var ringFilter func(database, series *string, time *int64) bool if replicationFactor != localServerToQuery.ringLocationsToQuery { ringFilter = self.clusterConfiguration.GetRingFilterFunction(db, localServerToQuery.ringLocationsToQuery) } self.datastore.ExecuteQuery(user, db, query, sendFromLocal, ringFilter) local <- &protocol.Response{Type: &endStreamResponse} close(local) }() self.streamResultsFromChannels(isSingleSeriesQuery, query.Ascending, responseChannels, yield) return nil }
// distribute query and possibly do the merge/join before yielding the points func (self *QueryEngine) distributeQuery(user common.User, database string, query *parser.SelectQuery, yield func(*protocol.Series) error) (err error) { // see if this is a merge query fromClause := query.GetFromClause() if fromClause.Type == parser.FromClauseMerge { yield = getMergeYield(fromClause.Names[0].Name.Name, fromClause.Names[1].Name.Name, query.Ascending, yield) } if fromClause.Type == parser.FromClauseInnerJoin { yield = getJoinYield(query, yield) } return self.coordinator.DistributeQuery(user, database, query, yield) }
// distribute query and possibly do the merge/join before yielding the points func (self *QueryEngine) distributeQuery(query *parser.SelectQuery, yield func(*protocol.Series) error) error { // see if this is a merge query fromClause := query.GetFromClause() if fromClause.Type == parser.FromClauseMerge { yield = getMergeYield(fromClause.Names[0].Name.Name, fromClause.Names[1].Name.Name, query.Ascending, yield) } if fromClause.Type == parser.FromClauseInnerJoin { yield = getJoinYield(query, yield) } self.yield = yield return nil }
func (s *RaftServer) runContinuousQuery(db string, query *parser.SelectQuery, start time.Time, end time.Time) { adminName := s.clusterConfig.GetClusterAdmins()[0] clusterAdmin := s.clusterConfig.GetClusterAdmin(adminName) intoClause := query.GetIntoClause() targetName := intoClause.Target.Name queryString := query.GetQueryStringForContinuousQuery(start, end) f := func(series *protocol.Series) error { return s.coordinator.InterpolateValuesAndCommit(db, series, targetName, true) } writer := NewContinuousQueryWriter(f) s.coordinator.RunQuery(clusterAdmin, db, queryString, writer) }
func (self *QueryEngine) executeCountQueryWithGroupBy(query *parser.SelectQuery, yield func(*protocol.Series) error) error { self.aggregateYield = yield duration, err := query.GetGroupByClause().GetGroupByTime() if err != nil { return err } self.isAggregateQuery = true self.duration = duration self.aggregators = []Aggregator{} for _, value := range query.GetColumnNames() { if !value.IsFunctionCall() { continue } lowerCaseName := strings.ToLower(value.Name) initializer := registeredAggregators[lowerCaseName] if initializer == nil { return common.NewQueryError(common.InvalidArgument, fmt.Sprintf("Unknown function %s", value.Name)) } aggregator, err := initializer(query, value, query.GetGroupByClause().FillValue) if err != nil { return common.NewQueryError(common.InvalidArgument, fmt.Sprintf("%s", err)) } self.aggregators = append(self.aggregators, aggregator) } for _, elem := range query.GetGroupByClause().Elems { if elem.IsFunctionCall() { continue } self.elems = append(self.elems, elem) } self.fillWithZero = query.GetGroupByClause().FillWithZero self.initializeFields() err = self.distributeQuery(query, func(series *protocol.Series) error { if len(series.Points) == 0 { return nil } return self.aggregateValuesForSeries(series) }) return err }
func (s *RaftServer) runContinuousQuery(db string, query *parser.SelectQuery, start time.Time, end time.Time) { clusterAdmin := s.clusterConfig.clusterAdmins["root"] intoClause := query.GetIntoClause() targetName := intoClause.Target.Name sequenceNumber := uint64(1) queryString := query.GetQueryStringForContinuousQuery(start, end) s.engine.RunQuery(clusterAdmin, db, queryString, false, func(series *protocol.Series) error { interpolatedTargetName := strings.Replace(targetName, ":series_name", *series.Name, -1) series.Name = &interpolatedTargetName for _, point := range series.Points { point.SequenceNumber = &sequenceNumber } return s.coordinator.WriteSeriesData(clusterAdmin, db, series) }) }
func NewQueryEngine(query *parser.SelectQuery, responseChan chan *protocol.Response) (*QueryEngine, error) { limit := query.Limit // disable limit if the query has aggregates let the coordinator // deal with the limit if query.HasAggregates() { limit = 0 } queryEngine := &QueryEngine{ query: query, where: query.GetWhereCondition(), limiter: NewLimiter(limit), responseChan: responseChan, seriesToPoints: make(map[string]*protocol.Series), // stats stuff explain: query.IsExplainQuery(), runStartTime: 0, runEndTime: 0, pointsRead: 0, pointsWritten: 0, shardId: 0, shardLocal: false, //that really doesn't matter if it is not EXPLAIN query } if queryEngine.explain { queryEngine.runStartTime = float64(time.Now().UnixNano()) / float64(time.Millisecond) } yield := func(series *protocol.Series) error { var response *protocol.Response if queryEngine.explain { //TODO: We may not have to send points, just count them queryEngine.pointsWritten += int64(len(series.Points)) } response = &protocol.Response{Type: &queryResponse, Series: series} responseChan <- response return nil } var err error if query.HasAggregates() { err = queryEngine.executeCountQueryWithGroupBy(query, yield) } else if containsArithmeticOperators(query) { err = queryEngine.executeArithmeticQuery(query, yield) } else { err = queryEngine.distributeQuery(query, yield) } if err != nil { return nil, err } return queryEngine, nil }
func NewTimestampAggregator(query *parser.SelectQuery, _ *parser.Value) (Aggregator, error) { duration, err := query.GetGroupByClause().GetGroupByTime() if err != nil { return nil, err } var durationPtr *int64 if duration != nil { newDuration := int64(*duration / time.Microsecond) durationPtr = &newDuration } return &TimestampAggregator{ timestamps: make(map[string]map[interface{}]int64), duration: durationPtr, }, nil }
func (self *LevelDbDatastore) fetchSinglePoint(database, series string, fields []*Field, query *parser.SelectQuery) (*protocol.Series, error) { fieldCount := len(fields) fieldNames := make([]string, 0, fieldCount) point := &protocol.Point{Values: make([]*protocol.FieldValue, 0, fieldCount)} timestampBuffer := bytes.NewBuffer(make([]byte, 0, 8)) sequenceNumberBuffer := bytes.NewBuffer(make([]byte, 0, 8)) timestamp := common.TimeToMicroseconds(query.GetStartTime()) sequenceNumber, err := query.GetSinglePointQuerySequenceNumber() if err != nil { return nil, err } binary.Write(timestampBuffer, binary.BigEndian, self.convertTimestampToUint(×tamp)) binary.Write(sequenceNumberBuffer, binary.BigEndian, sequenceNumber) sequenceNumber_uint64 := uint64(sequenceNumber) point.SequenceNumber = &sequenceNumber_uint64 point.SetTimestampInMicroseconds(timestamp) for _, field := range fields { pointKey := append(append(field.Id, timestampBuffer.Bytes()...), sequenceNumberBuffer.Bytes()...) if data, err := self.db.Get(self.readOptions, pointKey); err != nil { return nil, err } else { fieldValue := &protocol.FieldValue{} err := proto.Unmarshal(data, fieldValue) if err != nil { return nil, err } if data != nil { fieldNames = append(fieldNames, field.Name) point.Values = append(point.Values, fieldValue) } } } result := &protocol.Series{Name: &series, Fields: fieldNames, Points: []*protocol.Point{point}} return result, nil }
func Filter(query *parser.SelectQuery, series *protocol.Series) (*protocol.Series, error) { if query.GetWhereCondition() == nil { return series, nil } columns := map[string]bool{} getColumns(query.GetColumnNames(), columns) getColumns(query.GetGroupByClause().Elems, columns) points := series.Points series.Points = nil for _, point := range points { ok, err := matches(query.GetWhereCondition(), series.Fields, point) if err != nil { return nil, err } if ok { filterColumns(columns, series.Fields, point) series.Points = append(series.Points, point) } } if !columns["*"] { newFields := []string{} for _, f := range series.Fields { if _, ok := columns[f]; !ok { continue } newFields = append(newFields, f) } series.Fields = newFields } return series, nil }
// Distributes the query across the cluster and combines the results. Yields as they come in ensuring proper order. // TODO: make this work even if there is a downed server in the cluster func (self *CoordinatorImpl) DistributeQuery(user common.User, db string, query *parser.SelectQuery, yield func(*protocol.Series) error) error { if self.clusterConfiguration.IsSingleServer() { return self.datastore.ExecuteQuery(user, db, query, yield, nil) } servers, replicationFactor := self.clusterConfiguration.GetServersToMakeQueryTo(self.localHostId, &db) queryString := query.GetQueryString() id := atomic.AddUint32(&self.requestId, uint32(1)) userName := user.GetName() responseChannels := make([]chan *protocol.Response, 0, len(servers)+1) var localServerToQuery *serverToQuery for _, server := range servers { if server.server.Id == self.localHostId { localServerToQuery = server } else { request := &protocol.Request{Type: &queryRequest, Query: &queryString, Id: &id, Database: &db, UserName: &userName} if server.ringLocationsToQuery != replicationFactor { r := server.ringLocationsToQuery request.RingLocationsToQuery = &r } responseChan := make(chan *protocol.Response, 3) server.server.protobufClient.MakeRequest(request, responseChan) responseChannels = append(responseChannels, responseChan) } } local := make(chan *protocol.Response) nextPointMap := make(map[string]*protocol.Point) // TODO: this style of wrapping the series in response objects with the // last point time is duplicated in the request handler. Refactor... sendFromLocal := func(series *protocol.Series) error { pointCount := len(series.Points) if pointCount == 0 { if nextPoint := nextPointMap[*series.Name]; nextPoint != nil { series.Points = append(series.Points, nextPoint) } local <- &protocol.Response{Type: &queryResponse, Series: series} return nil } oldNextPoint := nextPointMap[*series.Name] nextPoint := series.Points[pointCount-1] series.Points[pointCount-1] = nil if oldNextPoint != nil { copy(series.Points[1:], series.Points[0:]) series.Points[0] = oldNextPoint } else { series.Points = series.Points[:len(series.Points)-1] } response := &protocol.Response{Series: series, Type: &queryResponse} if nextPoint != nil { nextPointMap[*series.Name] = nextPoint response.NextPointTime = nextPoint.Timestamp } local <- response return nil } responseChannels = append(responseChannels, local) // TODO: wire up the willreturnsingleseries method and uncomment this line and delete the next one. // isSingleSeriesQuery := query.WillReturnSingleSeries() isSingleSeriesQuery := false go func() { var ringFilter func(database, series *string, time *int64) bool if replicationFactor != localServerToQuery.ringLocationsToQuery { ringFilter = self.clusterConfiguration.GetRingFilterFunction(db, localServerToQuery.ringLocationsToQuery) } self.datastore.ExecuteQuery(user, db, query, sendFromLocal, ringFilter) local <- &protocol.Response{Type: &endStreamResponse} close(local) }() self.streamResultsFromChannels(isSingleSeriesQuery, query.Ascending, responseChannels, yield) return nil }
func NewPointFilter(query *parser.SelectQuery, queryColumnNames []string) *PointFilter { columns := map[string]bool{} getColumns(query.GetColumnNames(), columns) getColumns(query.GetGroupByClause().Elems, columns) return &PointFilter{columns: columns, queryColumnNames: queryColumnNames, where: query.GetWhereCondition()} }
func (self *LevelDbDatastore) executeQueryForSeries(database, series string, columns []string, query *parser.SelectQuery, yield func(*protocol.Series) error, ringFilter func(database, series *string, time *int64) bool) error { startTimeBytes, endTimeBytes := self.byteArraysForStartAndEndTimes(common.TimeToMicroseconds(query.GetStartTime()), common.TimeToMicroseconds(query.GetEndTime())) emptyResult := &protocol.Series{Name: &series, Points: nil} fields, err := self.getFieldsForSeries(database, series, columns) if err != nil { // because a db is distributed across the cluster, it's possible we don't have the series indexed here. ignore switch err := err.(type) { case FieldLookupError: return yield(emptyResult) default: return err } } fieldCount := len(fields) rawColumnValues := make([]*rawColumnValue, fieldCount, fieldCount) if query.IsSinglePointQuery() { result, err := self.fetchSinglePoint(database, series, fields, query) if err != nil { return err } if err := yield(result); err != nil { return err } return nil } fieldNames, iterators := self.getIterators(fields, startTimeBytes, endTimeBytes, query.Ascending) result := &protocol.Series{Name: &series, Fields: fieldNames, Points: make([]*protocol.Point, 0)} limit := query.Limit shouldLimit := true if limit == 0 { limit = -1 shouldLimit = false } resultByteCount := 0 // TODO: clean up, this is super gnarly // optimize for the case where we're pulling back only a single column or aggregate for { isValid := false point := &protocol.Point{Values: make([]*protocol.FieldValue, fieldCount, fieldCount)} for i, it := range iterators { if rawColumnValues[i] != nil || !it.Valid() { continue } key := it.Key() if len(key) < 16 { continue } if !isPointInRange(fields[i].Id, startTimeBytes, endTimeBytes, key) { continue } value := it.Value() sequenceNumber := key[16:] rawTime := key[8:16] rawValue := &rawColumnValue{time: rawTime, sequence: sequenceNumber, value: value} rawColumnValues[i] = rawValue } var pointTimeRaw []byte var pointSequenceRaw []byte // choose the highest (or lowest in case of ascending queries) timestamp // and sequence number. that will become the timestamp and sequence of // the next point. for _, value := range rawColumnValues { if value == nil { continue } pointTimeRaw, pointSequenceRaw = value.updatePointTimeAndSequence(pointTimeRaw, pointSequenceRaw, query.Ascending) } for i, iterator := range iterators { // if the value is nil or doesn't match the point's timestamp and sequence number // then skip it if rawColumnValues[i] == nil || !bytes.Equal(rawColumnValues[i].time, pointTimeRaw) || !bytes.Equal(rawColumnValues[i].sequence, pointSequenceRaw) { point.Values[i] = &protocol.FieldValue{IsNull: &TRUE} continue } // if we emitted at lease one column, then we should keep // trying to get more points isValid = true // advance the iterator to read a new value in the next iteration if query.Ascending { iterator.Next() } else { iterator.Prev() } fv := &protocol.FieldValue{} resultByteCount += len(rawColumnValues[i].value) err := proto.Unmarshal(rawColumnValues[i].value, fv) if err != nil { return err } point.Values[i] = fv rawColumnValues[i] = nil } var sequence uint64 // set the point sequence number and timestamp binary.Read(bytes.NewBuffer(pointSequenceRaw), binary.BigEndian, &sequence) var t uint64 binary.Read(bytes.NewBuffer(pointTimeRaw), binary.BigEndian, &t) time := self.convertUintTimestampToInt64(&t) point.SetTimestampInMicroseconds(time) point.SequenceNumber = &sequence // stop the loop if we ran out of points if !isValid { break } limit -= 1 if ringFilter != nil && ringFilter(&database, &series, point.Timestamp) { continue } result.Points = append(result.Points, point) // add byte count for the timestamp and the sequence resultByteCount += 16 // check if we should send the batch along if resultByteCount > MAX_SERIES_SIZE || (shouldLimit && limit == 0) { dropped, err := self.sendBatch(query, result, yield) if err != nil { return err } limit += dropped resultByteCount = 0 result = &protocol.Series{Name: &series, Fields: fieldNames, Points: make([]*protocol.Point, 0)} } if shouldLimit && limit < 1 { break } } if _, err := self.sendBatch(query, result, yield); err != nil { return err } _, err = self.sendBatch(query, emptyResult, yield) return err }
func (self *QueryEngine) executeCountQueryWithGroupBy(query *parser.SelectQuery, yield func(*protocol.Series) error) error { self.aggregateYield = yield duration, err := query.GetGroupByClause().GetGroupByTime() if err != nil { return err } self.isAggregateQuery = true self.duration = duration self.aggregators = []Aggregator{} for _, value := range query.GetColumnNames() { if value.IsFunctionCall() { lowerCaseName := strings.ToLower(value.Name) initializer := registeredAggregators[lowerCaseName] if initializer == nil { return common.NewQueryError(common.InvalidArgument, fmt.Sprintf("Unknown function %s", value.Name)) } aggregator, err := initializer(query, value, query.GetGroupByClause().FillValue) if err != nil { return err } self.aggregators = append(self.aggregators, aggregator) } } timestampAggregator, err := NewTimestampAggregator(query, nil) if err != nil { return err } self.timestampAggregator = timestampAggregator self.groups = make(map[string]map[Group]bool) self.pointsRange = make(map[string]*PointRange) self.groupBy = query.GetGroupByClause() err = self.distributeQuery(query, func(series *protocol.Series) error { if len(series.Points) == 0 { return nil } var mapper Mapper mapper, err = createValuesToInterface(self.groupBy, series.Fields) if err != nil { return err } for _, aggregator := range self.aggregators { if err := aggregator.InitializeFieldsMetadata(series); err != nil { return err } } currentRange := self.pointsRange[*series.Name] for _, point := range series.Points { value := mapper(point) for _, aggregator := range self.aggregators { err := aggregator.AggregatePoint(*series.Name, value, point) if err != nil { return err } } self.timestampAggregator.AggregatePoint(*series.Name, value, point) seriesGroups := self.groups[*series.Name] if seriesGroups == nil { seriesGroups = make(map[Group]bool) self.groups[*series.Name] = seriesGroups } seriesGroups[value] = true if currentRange == nil { currentRange = &PointRange{*point.Timestamp, *point.Timestamp} self.pointsRange[*series.Name] = currentRange } else { currentRange.UpdateRange(point) } } return nil }) return err }
func NewFilteringEngine(query *parser.SelectQuery, processor QueryProcessor) *FilteringEngine { shouldFilter := query.GetWhereCondition() != nil return &FilteringEngine{query, processor, shouldFilter} }
func (self *QueryEngine) executeCountQueryWithGroupBy(user common.User, database string, query *parser.SelectQuery, yield func(*protocol.Series) error) error { duration, err := query.GetGroupByClause().GetGroupByTime() if err != nil { return err } aggregators := []Aggregator{} for _, value := range query.GetColumnNames() { if value.IsFunctionCall() { lowerCaseName := strings.ToLower(value.Name) initializer := registeredAggregators[lowerCaseName] if initializer == nil { return common.NewQueryError(common.InvalidArgument, fmt.Sprintf("Unknown function %s", value.Name)) } aggregator, err := initializer(query, value, query.GetGroupByClause().FillValue) if err != nil { return err } aggregators = append(aggregators, aggregator) } } timestampAggregator, err := NewTimestampAggregator(query, nil) if err != nil { return err } groups := make(map[string]map[Group]bool) pointsRange := make(map[string]*PointRange) groupBy := query.GetGroupByClause() err = self.distributeQuery(user, database, query, func(series *protocol.Series) error { if len(series.Points) == 0 { return nil } var mapper Mapper mapper, err = createValuesToInterface(groupBy, series.Fields) if err != nil { return err } for _, aggregator := range aggregators { if err := aggregator.InitializeFieldsMetadata(series); err != nil { return err } } currentRange := pointsRange[*series.Name] for _, point := range series.Points { value := mapper(point) for _, aggregator := range aggregators { err := aggregator.AggregatePoint(*series.Name, value, point) if err != nil { return err } } timestampAggregator.AggregatePoint(*series.Name, value, point) seriesGroups := groups[*series.Name] if seriesGroups == nil { seriesGroups = make(map[Group]bool) groups[*series.Name] = seriesGroups } seriesGroups[value] = true if currentRange == nil { currentRange = &PointRange{*point.Timestamp, *point.Timestamp} pointsRange[*series.Name] = currentRange } else { currentRange.UpdateRange(point) } } return nil }) if err != nil { return err } fields := []string{} for _, aggregator := range aggregators { columnNames := aggregator.ColumnNames() fields = append(fields, columnNames...) } for _, value := range groupBy.Elems { if value.IsFunctionCall() { continue } tempName := value.Name fields = append(fields, tempName) } for table, tableGroups := range groups { tempTable := table points := []*protocol.Point{} var _groups []Group if !query.GetGroupByClause().FillWithZero || duration == nil { // sort the table groups by timestamp _groups = make([]Group, 0, len(tableGroups)) for groupId, _ := range tableGroups { _groups = append(_groups, groupId) } } else { groupsWithTime := map[Group]bool{} timeRange, ok := pointsRange[table] if ok { first := timeRange.startTime * 1000 / int64(*duration) * int64(*duration) end := timeRange.endTime * 1000 / int64(*duration) * int64(*duration) for i := 0; ; i++ { timestamp := first + int64(i)*int64(*duration) if end < timestamp { break } for group, _ := range tableGroups { groupWithTime := group.WithoutTimestamp().WithTimestamp(timestamp / 1000) groupsWithTime[groupWithTime] = true } } for groupId, _ := range groupsWithTime { _groups = append(_groups, groupId) } } } fillWithZero := duration != nil && query.GetGroupByClause().FillWithZero var sortedGroups SortableGroups if fillWithZero { if query.Ascending { sortedGroups = &AscendingGroupTimestampSortableGroups{CommonSortableGroups{_groups, table}} } else { sortedGroups = &DescendingGroupTimestampSortableGroups{CommonSortableGroups{_groups, table}} } } else { if query.Ascending { sortedGroups = &AscendingAggregatorSortableGroups{CommonSortableGroups{_groups, table}, timestampAggregator} } else { sortedGroups = &DescendingAggregatorSortableGroups{CommonSortableGroups{_groups, table}, timestampAggregator} } } sort.Sort(sortedGroups) for _, groupId := range sortedGroups.GetSortedGroups() { var timestamp int64 if groupId.HasTimestamp() { timestamp = groupId.GetTimestamp() } else { timestamp = *timestampAggregator.GetValues(table, groupId)[0][0].Int64Value } values := [][][]*protocol.FieldValue{} for _, aggregator := range aggregators { values = append(values, aggregator.GetValues(table, groupId)) } // do cross product of all the values _values := crossProduct(values) for _, v := range _values { /* groupPoints := []*protocol.Point{} */ point := &protocol.Point{ Values: v, } point.SetTimestampInMicroseconds(timestamp) // FIXME: this should be looking at the fields slice not the group by clause // FIXME: we should check whether the selected columns are in the group by clause for idx, _ := range groupBy.Elems { if duration != nil && idx == 0 { continue } value := groupId.GetValue(idx) switch x := value.(type) { case string: point.Values = append(point.Values, &protocol.FieldValue{StringValue: &x}) case bool: point.Values = append(point.Values, &protocol.FieldValue{BoolValue: &x}) case float64: point.Values = append(point.Values, &protocol.FieldValue{DoubleValue: &x}) case int64: point.Values = append(point.Values, &protocol.FieldValue{Int64Value: &x}) case nil: point.Values = append(point.Values, nil) } } points = append(points, point) } } expectedData := &protocol.Series{ Name: &tempTable, Fields: fields, Points: points, } yield(expectedData) } return nil }
func (self *QueryEngine) executeCountQueryWithGroupBy(query *parser.SelectQuery, yield func(*protocol.Series) error) error { self.aggregateYield = yield duration, err := query.GetGroupByClause().GetGroupByTime() if err != nil { return err } self.isAggregateQuery = true self.duration = duration self.aggregators = []Aggregator{} for _, value := range query.GetColumnNames() { if !value.IsFunctionCall() { continue } lowerCaseName := strings.ToLower(value.Name) initializer := registeredAggregators[lowerCaseName] if initializer == nil { return common.NewQueryError(common.InvalidArgument, fmt.Sprintf("Unknown function %s", value.Name)) } aggregator, err := initializer(query, value, query.GetGroupByClause().FillValue) if err != nil { return common.NewQueryError(common.InvalidArgument, fmt.Sprintf("%s", err)) } self.aggregators = append(self.aggregators, aggregator) } timestampAggregator, err := NewTimestampAggregator(query, nil) if err != nil { return err } self.timestampAggregator = timestampAggregator self.groups = make(map[string]map[Group]bool) self.pointsRange = make(map[string]*PointRange) self.groupBy = query.GetGroupByClause() self.initializeFields() err = self.distributeQuery(query, func(series *protocol.Series) error { if len(series.Points) == 0 { return nil } // if we're not doing group by time() then keep all the state in // memory until the query finishes reading all data points if self.duration == nil || query.GetGroupByClause().FillWithZero { return self.aggregateValuesForSeries(series) } // otherwise, keep the state for the current bucket. Once ticks // come in for a different time bucket, we flush the state that's // kept in memory by the aggregators // split the time series by time buckets bucketedSeries := []*protocol.Series{} currentSeries := &protocol.Series{ Name: series.Name, Fields: series.Fields, Points: []*protocol.Point{series.Points[0]}, } currentBucket := self.getTimestampFromPoint(series.Points[0]) for _, p := range series.Points[1:] { bucket := self.getTimestampFromPoint(p) if bucket != currentBucket { bucketedSeries = append(bucketedSeries, currentSeries) currentSeries = &protocol.Series{Name: series.Name, Fields: series.Fields} currentBucket = bucket } currentSeries.Points = append(currentSeries.Points, p) } bucketedSeries = append(bucketedSeries, currentSeries) for _, s := range bucketedSeries[:len(bucketedSeries)-1] { if err := self.aggregateValuesForSeries(s); err != nil { return err } self.calculateSummariesForTable(*s.Name) } last := bucketedSeries[len(bucketedSeries)-1] bucket := self.getTimestampFromPoint(last.Points[0]) if b, ok := self.buckets[*series.Name]; ok && b != bucket { self.calculateSummariesForTable(*last.Name) } self.buckets[*series.Name] = bucket return self.aggregateValuesForSeries(last) }) return err }