Beispiel #1
0
func NewQueryEngine(query *parser.SelectQuery, responseChan chan *protocol.Response) *QueryEngine {
	limit := query.Limit
	shouldLimit := true
	if limit == 0 {
		shouldLimit = false
	}

	queryEngine := &QueryEngine{
		query:          query,
		where:          query.GetWhereCondition(),
		limit:          limit,
		limits:         make(map[string]int),
		shouldLimit:    shouldLimit,
		responseChan:   responseChan,
		seriesToPoints: make(map[string]*protocol.Series),
	}

	yield := func(series *protocol.Series) error {
		response := &protocol.Response{Type: &responseQuery, Series: series}
		responseChan <- response
		return nil
	}

	if query.HasAggregates() {
		queryEngine.executeCountQueryWithGroupBy(query, yield)
	} else if containsArithmeticOperators(query) {
		queryEngine.executeArithmeticQuery(query, yield)
	} else {
		queryEngine.distributeQuery(query, yield)
	}

	return queryEngine
}
Beispiel #2
0
func (self *LevelDbDatastore) ExecuteQuery(user common.User, database string,
	query *parser.SelectQuery, yield func(*protocol.Series) error,
	ringFilter func(database, series *string, time *int64) bool) error {

	seriesAndColumns := query.GetReferencedColumns()
	hasAccess := true
	for series, columns := range seriesAndColumns {
		if regex, ok := series.GetCompiledRegex(); ok {
			seriesNames := self.getSeriesForDbAndRegex(database, regex)
			for _, name := range seriesNames {
				if !user.HasReadAccess(name) {
					hasAccess = false
					continue
				}
				err := self.executeQueryForSeries(database, name, columns, query, yield, ringFilter)
				if err != nil {
					return err
				}
			}
		} else {
			if !user.HasReadAccess(series.Name) {
				hasAccess = false
				continue
			}
			err := self.executeQueryForSeries(database, series.Name, columns, query, yield, ringFilter)
			if err != nil {
				return err
			}
		}
	}
	if !hasAccess {
		return fmt.Errorf("You don't have permission to access one or more time series")
	}
	return nil
}
Beispiel #3
0
// Return the number of dropped ticks from filtering. if the series
// had more than one alias, returns the min of all dropped ticks
func (self *LevelDbDatastore) sendBatch(query *parser.SelectQuery, series *protocol.Series, yield func(series *protocol.Series) error) (int, error) {
	dropped := int(math.MaxInt32)

	for _, alias := range query.GetTableAliases(*series.Name) {
		_alias := alias
		newSeries := &protocol.Series{Name: &_alias, Points: series.Points, Fields: series.Fields}

		lengthBeforeFiltering := len(newSeries.Points)
		var filteredResult *protocol.Series
		var err error
		if query.GetFromClause().Type == parser.FromClauseInnerJoin {
			filteredResult = newSeries
		} else {
			filteredResult, err = Filter(query, newSeries)
			if err != nil {
				return 0, err
			}
		}
		_dropped := lengthBeforeFiltering - len(filteredResult.Points)
		if _dropped < dropped {
			dropped = _dropped
		}
		if err := yield(filteredResult); err != nil {
			return 0, err
		}
	}

	return dropped, nil
}
Beispiel #4
0
func containsArithmeticOperators(query *parser.SelectQuery) bool {
	for _, column := range query.GetColumnNames() {
		if column.Type == parser.ValueExpression {
			return true
		}
	}
	return false
}
Beispiel #5
0
func isAggregateQuery(query *parser.SelectQuery) bool {
	for _, column := range query.GetColumnNames() {
		if column.IsFunctionCall() {
			return true
		}
	}
	return false
}
Beispiel #6
0
func Filter(query *parser.SelectQuery, series *protocol.Series) (*protocol.Series, error) {
	if query.GetWhereCondition() == nil {
		return series, nil
	}

	columns := map[string]struct{}{}
	if query.GetFromClause().Type == parser.FromClauseInnerJoin {
	outer:
		for t, cs := range query.GetResultColumns() {
			for _, c := range cs {
				// if this is a wildcard select, then drop all columns and
				// just use '*'
				if c == "*" {
					columns = make(map[string]struct{}, 1)
					columns[c] = struct{}{}
					break outer
				}
				columns[t.Name+"."+c] = struct{}{}
			}
		}
	} else {
		for _, cs := range query.GetResultColumns() {
			for _, c := range cs {
				columns[c] = struct{}{}
			}
		}
	}

	points := series.Points
	series.Points = nil
	for _, point := range points {
		ok, err := matches(query.GetWhereCondition(), series.Fields, point)

		if err != nil {
			return nil, err
		}

		if ok {
			fmt.Printf("columns: %v, fields: %v\n", columns, series.Fields)

			filterColumns(columns, series.Fields, point)
			series.Points = append(series.Points, point)
		}
	}

	if _, ok := columns["*"]; !ok {
		newFields := []string{}
		for _, f := range series.Fields {
			if _, ok := columns[f]; !ok {
				continue
			}

			newFields = append(newFields, f)
		}
		series.Fields = newFields
	}
	return series, nil
}
Beispiel #7
0
func (self *QueryEngine) executeArithmeticQuery(query *parser.SelectQuery, yield func(*protocol.Series) error) error {

	names := map[string]*parser.Value{}
	for idx, v := range query.GetColumnNames() {
		switch v.Type {
		case parser.ValueSimpleName:
			names[v.Name] = v
		case parser.ValueFunctionCall:
			names[v.Name] = v
		case parser.ValueExpression:
			if v.Alias != "" {
				names[v.Alias] = v
			} else {
				names["expr"+strconv.Itoa(idx)] = v
			}
		}
	}

	return self.distributeQuery(query, func(series *protocol.Series) error {
		if len(series.Points) == 0 {
			yield(series)
			return nil
		}

		newSeries := &protocol.Series{
			Name: series.Name,
		}

		// create the new column names
		for name, _ := range names {
			newSeries.Fields = append(newSeries.Fields, name)
		}

		for _, point := range series.Points {
			newPoint := &protocol.Point{
				Timestamp:      point.Timestamp,
				SequenceNumber: point.SequenceNumber,
			}
			for _, field := range newSeries.Fields {
				value := names[field]
				v, err := GetValue(value, series.Fields, point)
				if err != nil {
					log.Error("Error in arithmetic computation: %s", err)
					return err
				}
				newPoint.Values = append(newPoint.Values, v)
			}
			newSeries.Points = append(newSeries.Points, newPoint)
		}

		yield(newSeries)

		return nil
	})
}
Beispiel #8
0
func getJoinYield(query *parser.SelectQuery, yield func(*protocol.Series) error) func(*protocol.Series) error {
	var lastPoint1 *protocol.Point
	var lastFields1 []string
	var lastPoint2 *protocol.Point
	var lastFields2 []string

	table1 := query.GetFromClause().Names[0].GetAlias()
	table2 := query.GetFromClause().Names[1].GetAlias()
	name := table1 + "_join_" + table2

	return mergeYield(table1, table2, false, query.Ascending, func(s *protocol.Series) error {
		fmt.Printf("join series: %d\n", len(s.Points))
		if *s.Name == table1 {
			lastPoint1 = s.Points[len(s.Points)-1]
			if lastFields1 == nil {
				for _, f := range s.Fields {
					lastFields1 = append(lastFields1, table1+"."+f)
				}
			}
		}

		if *s.Name == table2 {
			lastPoint2 = s.Points[len(s.Points)-1]
			if lastFields2 == nil {
				for _, f := range s.Fields {
					lastFields2 = append(lastFields2, table2+"."+f)
				}
			}
		}

		if lastPoint1 == nil || lastPoint2 == nil {
			return nil
		}

		newSeries := &protocol.Series{
			Name:   &name,
			Fields: append(lastFields1, lastFields2...),
			Points: []*protocol.Point{
				&protocol.Point{
					Values:    append(lastPoint1.Values, lastPoint2.Values...),
					Timestamp: lastPoint2.Timestamp,
				},
			},
		}

		lastPoint1 = nil
		lastPoint2 = nil

		filteredSeries, _ := Filter(query, newSeries)
		if len(filteredSeries.Points) > 0 {
			return yield(newSeries)
		}
		return nil
	})
}
Beispiel #9
0
// Distributes the query across the cluster and combines the results. Yields as they come in ensuring proper order.
// TODO: make this work even if there is a downed server in the cluster
func (self *CoordinatorImpl) DistributeQuery(user common.User, db string, query *parser.SelectQuery, localOnly bool, yield func(*protocol.Series) error) error {
	if self.clusterConfiguration.IsSingleServer() || localOnly {
		return self.datastore.ExecuteQuery(user, db, query, yield, nil)
	}
	servers, replicationFactor := self.clusterConfiguration.GetServersToMakeQueryTo(&db)
	id := atomic.AddUint32(&self.requestId, uint32(1))
	userName := user.GetName()
	isDbUser := !user.IsClusterAdmin()
	responseChannels := make([]chan *protocol.Response, 0, len(servers)+1)
	queryString := query.GetQueryString()
	var localServerToQuery *serverToQuery
	for _, server := range servers {
		if server.server.Id == self.clusterConfiguration.localServerId {
			localServerToQuery = server
		} else {
			request := &protocol.Request{Type: &queryRequest, Query: &queryString, Id: &id, Database: &db, UserName: &userName, IsDbUser: &isDbUser}
			if server.ringLocationsToQuery != replicationFactor {
				r := server.ringLocationsToQuery
				request.RingLocationsToQuery = &r
			}
			responseChan := make(chan *protocol.Response, 3)
			server.server.MakeRequest(request, responseChan)
			responseChannels = append(responseChannels, responseChan)
		}
	}

	local := make(chan *protocol.Response)
	nextPointMap := make(map[string]*NextPoint)

	// TODO: this style of wrapping the series in response objects with the
	//       last point time is duplicated in the request handler. Refactor...
	sendFromLocal := func(series *protocol.Series) error {
		response := createResponse(nextPointMap, series, nil)
		local <- response
		return nil
	}
	responseChannels = append(responseChannels, local)
	// TODO: wire up the willreturnsingleseries method and uncomment this line and delete the next one.
	//	isSingleSeriesQuery := query.WillReturnSingleSeries()
	isSingleSeriesQuery := false

	go func() {
		var ringFilter func(database, series *string, time *int64) bool
		if replicationFactor != localServerToQuery.ringLocationsToQuery {
			ringFilter = self.clusterConfiguration.GetRingFilterFunction(db, localServerToQuery.ringLocationsToQuery)
		}
		self.datastore.ExecuteQuery(user, db, query, sendFromLocal, ringFilter)
		local <- &protocol.Response{Type: &endStreamResponse}
		close(local)
	}()
	self.streamResultsFromChannels(isSingleSeriesQuery, query.Ascending, responseChannels, yield)
	return nil
}
Beispiel #10
0
// distribute query and possibly do the merge/join before yielding the points
func (self *QueryEngine) distributeQuery(user common.User, database string, query *parser.SelectQuery, yield func(*protocol.Series) error) (err error) {
	// see if this is a merge query
	fromClause := query.GetFromClause()
	if fromClause.Type == parser.FromClauseMerge {
		yield = getMergeYield(fromClause.Names[0].Name.Name, fromClause.Names[1].Name.Name, query.Ascending, yield)
	}

	if fromClause.Type == parser.FromClauseInnerJoin {
		yield = getJoinYield(query, yield)
	}

	return self.coordinator.DistributeQuery(user, database, query, yield)
}
Beispiel #11
0
// distribute query and possibly do the merge/join before yielding the points
func (self *QueryEngine) distributeQuery(query *parser.SelectQuery, yield func(*protocol.Series) error) error {
	// see if this is a merge query
	fromClause := query.GetFromClause()
	if fromClause.Type == parser.FromClauseMerge {
		yield = getMergeYield(fromClause.Names[0].Name.Name, fromClause.Names[1].Name.Name, query.Ascending, yield)
	}

	if fromClause.Type == parser.FromClauseInnerJoin {
		yield = getJoinYield(query, yield)
	}

	self.yield = yield
	return nil
}
Beispiel #12
0
func (s *RaftServer) runContinuousQuery(db string, query *parser.SelectQuery, start time.Time, end time.Time) {
	adminName := s.clusterConfig.GetClusterAdmins()[0]
	clusterAdmin := s.clusterConfig.GetClusterAdmin(adminName)
	intoClause := query.GetIntoClause()
	targetName := intoClause.Target.Name
	queryString := query.GetQueryStringForContinuousQuery(start, end)

	f := func(series *protocol.Series) error {
		return s.coordinator.InterpolateValuesAndCommit(db, series, targetName, true)
	}

	writer := NewContinuousQueryWriter(f)
	s.coordinator.RunQuery(clusterAdmin, db, queryString, writer)
}
Beispiel #13
0
func (self *QueryEngine) executeCountQueryWithGroupBy(query *parser.SelectQuery, yield func(*protocol.Series) error) error {
	self.aggregateYield = yield
	duration, err := query.GetGroupByClause().GetGroupByTime()
	if err != nil {
		return err
	}

	self.isAggregateQuery = true
	self.duration = duration
	self.aggregators = []Aggregator{}

	for _, value := range query.GetColumnNames() {
		if !value.IsFunctionCall() {
			continue
		}
		lowerCaseName := strings.ToLower(value.Name)
		initializer := registeredAggregators[lowerCaseName]
		if initializer == nil {
			return common.NewQueryError(common.InvalidArgument, fmt.Sprintf("Unknown function %s", value.Name))
		}
		aggregator, err := initializer(query, value, query.GetGroupByClause().FillValue)
		if err != nil {
			return common.NewQueryError(common.InvalidArgument, fmt.Sprintf("%s", err))
		}
		self.aggregators = append(self.aggregators, aggregator)
	}

	for _, elem := range query.GetGroupByClause().Elems {
		if elem.IsFunctionCall() {
			continue
		}
		self.elems = append(self.elems, elem)
	}

	self.fillWithZero = query.GetGroupByClause().FillWithZero

	self.initializeFields()

	err = self.distributeQuery(query, func(series *protocol.Series) error {
		if len(series.Points) == 0 {
			return nil
		}

		return self.aggregateValuesForSeries(series)
	})

	return err
}
Beispiel #14
0
func (s *RaftServer) runContinuousQuery(db string, query *parser.SelectQuery, start time.Time, end time.Time) {
	clusterAdmin := s.clusterConfig.clusterAdmins["root"]
	intoClause := query.GetIntoClause()
	targetName := intoClause.Target.Name
	sequenceNumber := uint64(1)
	queryString := query.GetQueryStringForContinuousQuery(start, end)

	s.engine.RunQuery(clusterAdmin, db, queryString, false, func(series *protocol.Series) error {
		interpolatedTargetName := strings.Replace(targetName, ":series_name", *series.Name, -1)
		series.Name = &interpolatedTargetName
		for _, point := range series.Points {
			point.SequenceNumber = &sequenceNumber
		}

		return s.coordinator.WriteSeriesData(clusterAdmin, db, series)
	})
}
Beispiel #15
0
func NewQueryEngine(query *parser.SelectQuery, responseChan chan *protocol.Response) (*QueryEngine, error) {
	limit := query.Limit
	// disable limit if the query has aggregates let the coordinator
	// deal with the limit
	if query.HasAggregates() {
		limit = 0
	}

	queryEngine := &QueryEngine{
		query:          query,
		where:          query.GetWhereCondition(),
		limiter:        NewLimiter(limit),
		responseChan:   responseChan,
		seriesToPoints: make(map[string]*protocol.Series),
		// stats stuff
		explain:       query.IsExplainQuery(),
		runStartTime:  0,
		runEndTime:    0,
		pointsRead:    0,
		pointsWritten: 0,
		shardId:       0,
		shardLocal:    false, //that really doesn't matter if it is not EXPLAIN query
	}

	if queryEngine.explain {
		queryEngine.runStartTime = float64(time.Now().UnixNano()) / float64(time.Millisecond)
	}

	yield := func(series *protocol.Series) error {
		var response *protocol.Response

		if queryEngine.explain {
			//TODO: We may not have to send points, just count them
			queryEngine.pointsWritten += int64(len(series.Points))
		}

		response = &protocol.Response{Type: &queryResponse, Series: series}
		responseChan <- response
		return nil
	}

	var err error
	if query.HasAggregates() {
		err = queryEngine.executeCountQueryWithGroupBy(query, yield)
	} else if containsArithmeticOperators(query) {
		err = queryEngine.executeArithmeticQuery(query, yield)
	} else {
		err = queryEngine.distributeQuery(query, yield)
	}

	if err != nil {
		return nil, err
	}
	return queryEngine, nil
}
Beispiel #16
0
func NewTimestampAggregator(query *parser.SelectQuery, _ *parser.Value) (Aggregator, error) {
	duration, err := query.GetGroupByClause().GetGroupByTime()
	if err != nil {
		return nil, err
	}

	var durationPtr *int64

	if duration != nil {
		newDuration := int64(*duration / time.Microsecond)
		durationPtr = &newDuration
	}

	return &TimestampAggregator{
		timestamps: make(map[string]map[interface{}]int64),
		duration:   durationPtr,
	}, nil
}
Beispiel #17
0
func (self *LevelDbDatastore) fetchSinglePoint(database, series string, fields []*Field,
	query *parser.SelectQuery) (*protocol.Series, error) {
	fieldCount := len(fields)
	fieldNames := make([]string, 0, fieldCount)
	point := &protocol.Point{Values: make([]*protocol.FieldValue, 0, fieldCount)}
	timestampBuffer := bytes.NewBuffer(make([]byte, 0, 8))
	sequenceNumberBuffer := bytes.NewBuffer(make([]byte, 0, 8))
	timestamp := common.TimeToMicroseconds(query.GetStartTime())
	sequenceNumber, err := query.GetSinglePointQuerySequenceNumber()
	if err != nil {
		return nil, err
	}

	binary.Write(timestampBuffer, binary.BigEndian, self.convertTimestampToUint(&timestamp))
	binary.Write(sequenceNumberBuffer, binary.BigEndian, sequenceNumber)
	sequenceNumber_uint64 := uint64(sequenceNumber)
	point.SequenceNumber = &sequenceNumber_uint64
	point.SetTimestampInMicroseconds(timestamp)

	for _, field := range fields {
		pointKey := append(append(field.Id, timestampBuffer.Bytes()...), sequenceNumberBuffer.Bytes()...)

		if data, err := self.db.Get(self.readOptions, pointKey); err != nil {
			return nil, err
		} else {
			fieldValue := &protocol.FieldValue{}
			err := proto.Unmarshal(data, fieldValue)
			if err != nil {
				return nil, err
			}
			if data != nil {
				fieldNames = append(fieldNames, field.Name)
				point.Values = append(point.Values, fieldValue)
			}
		}
	}

	result := &protocol.Series{Name: &series, Fields: fieldNames, Points: []*protocol.Point{point}}

	return result, nil
}
Beispiel #18
0
func Filter(query *parser.SelectQuery, series *protocol.Series) (*protocol.Series, error) {
	if query.GetWhereCondition() == nil {
		return series, nil
	}

	columns := map[string]bool{}
	getColumns(query.GetColumnNames(), columns)
	getColumns(query.GetGroupByClause().Elems, columns)

	points := series.Points
	series.Points = nil
	for _, point := range points {
		ok, err := matches(query.GetWhereCondition(), series.Fields, point)

		if err != nil {
			return nil, err
		}

		if ok {
			filterColumns(columns, series.Fields, point)
			series.Points = append(series.Points, point)
		}
	}

	if !columns["*"] {
		newFields := []string{}
		for _, f := range series.Fields {
			if _, ok := columns[f]; !ok {
				continue
			}

			newFields = append(newFields, f)
		}
		series.Fields = newFields
	}
	return series, nil
}
Beispiel #19
0
// Distributes the query across the cluster and combines the results. Yields as they come in ensuring proper order.
// TODO: make this work even if there is a downed server in the cluster
func (self *CoordinatorImpl) DistributeQuery(user common.User, db string, query *parser.SelectQuery, yield func(*protocol.Series) error) error {
	if self.clusterConfiguration.IsSingleServer() {
		return self.datastore.ExecuteQuery(user, db, query, yield, nil)
	}
	servers, replicationFactor := self.clusterConfiguration.GetServersToMakeQueryTo(self.localHostId, &db)
	queryString := query.GetQueryString()
	id := atomic.AddUint32(&self.requestId, uint32(1))
	userName := user.GetName()
	responseChannels := make([]chan *protocol.Response, 0, len(servers)+1)
	var localServerToQuery *serverToQuery
	for _, server := range servers {
		if server.server.Id == self.localHostId {
			localServerToQuery = server
		} else {
			request := &protocol.Request{Type: &queryRequest, Query: &queryString, Id: &id, Database: &db, UserName: &userName}
			if server.ringLocationsToQuery != replicationFactor {
				r := server.ringLocationsToQuery
				request.RingLocationsToQuery = &r
			}
			responseChan := make(chan *protocol.Response, 3)
			server.server.protobufClient.MakeRequest(request, responseChan)
			responseChannels = append(responseChannels, responseChan)
		}
	}

	local := make(chan *protocol.Response)
	nextPointMap := make(map[string]*protocol.Point)

	// TODO: this style of wrapping the series in response objects with the
	//       last point time is duplicated in the request handler. Refactor...
	sendFromLocal := func(series *protocol.Series) error {
		pointCount := len(series.Points)
		if pointCount == 0 {
			if nextPoint := nextPointMap[*series.Name]; nextPoint != nil {
				series.Points = append(series.Points, nextPoint)
			}

			local <- &protocol.Response{Type: &queryResponse, Series: series}
			return nil
		}
		oldNextPoint := nextPointMap[*series.Name]
		nextPoint := series.Points[pointCount-1]
		series.Points[pointCount-1] = nil
		if oldNextPoint != nil {
			copy(series.Points[1:], series.Points[0:])
			series.Points[0] = oldNextPoint
		} else {
			series.Points = series.Points[:len(series.Points)-1]
		}

		response := &protocol.Response{Series: series, Type: &queryResponse}
		if nextPoint != nil {
			nextPointMap[*series.Name] = nextPoint
			response.NextPointTime = nextPoint.Timestamp
		}
		local <- response
		return nil
	}
	responseChannels = append(responseChannels, local)
	// TODO: wire up the willreturnsingleseries method and uncomment this line and delete the next one.
	//	isSingleSeriesQuery := query.WillReturnSingleSeries()
	isSingleSeriesQuery := false

	go func() {
		var ringFilter func(database, series *string, time *int64) bool
		if replicationFactor != localServerToQuery.ringLocationsToQuery {
			ringFilter = self.clusterConfiguration.GetRingFilterFunction(db, localServerToQuery.ringLocationsToQuery)
		}
		self.datastore.ExecuteQuery(user, db, query, sendFromLocal, ringFilter)
		local <- &protocol.Response{Type: &endStreamResponse}
		close(local)
	}()
	self.streamResultsFromChannels(isSingleSeriesQuery, query.Ascending, responseChannels, yield)
	return nil
}
Beispiel #20
0
func NewPointFilter(query *parser.SelectQuery, queryColumnNames []string) *PointFilter {
	columns := map[string]bool{}
	getColumns(query.GetColumnNames(), columns)
	getColumns(query.GetGroupByClause().Elems, columns)
	return &PointFilter{columns: columns, queryColumnNames: queryColumnNames, where: query.GetWhereCondition()}
}
Beispiel #21
0
func (self *LevelDbDatastore) executeQueryForSeries(database, series string, columns []string,
	query *parser.SelectQuery, yield func(*protocol.Series) error,
	ringFilter func(database, series *string, time *int64) bool) error {

	startTimeBytes, endTimeBytes := self.byteArraysForStartAndEndTimes(common.TimeToMicroseconds(query.GetStartTime()), common.TimeToMicroseconds(query.GetEndTime()))
	emptyResult := &protocol.Series{Name: &series, Points: nil}

	fields, err := self.getFieldsForSeries(database, series, columns)
	if err != nil {
		// because a db is distributed across the cluster, it's possible we don't have the series indexed here. ignore
		switch err := err.(type) {
		case FieldLookupError:
			return yield(emptyResult)
		default:
			return err
		}
	}

	fieldCount := len(fields)
	rawColumnValues := make([]*rawColumnValue, fieldCount, fieldCount)

	if query.IsSinglePointQuery() {
		result, err := self.fetchSinglePoint(database, series, fields, query)
		if err != nil {
			return err
		}

		if err := yield(result); err != nil {
			return err
		}
		return nil
	}

	fieldNames, iterators := self.getIterators(fields, startTimeBytes, endTimeBytes, query.Ascending)
	result := &protocol.Series{Name: &series, Fields: fieldNames, Points: make([]*protocol.Point, 0)}

	limit := query.Limit
	shouldLimit := true
	if limit == 0 {
		limit = -1
		shouldLimit = false
	}
	resultByteCount := 0

	// TODO: clean up, this is super gnarly
	// optimize for the case where we're pulling back only a single column or aggregate
	for {
		isValid := false
		point := &protocol.Point{Values: make([]*protocol.FieldValue, fieldCount, fieldCount)}

		for i, it := range iterators {
			if rawColumnValues[i] != nil || !it.Valid() {
				continue
			}

			key := it.Key()
			if len(key) < 16 {
				continue
			}

			if !isPointInRange(fields[i].Id, startTimeBytes, endTimeBytes, key) {
				continue
			}

			value := it.Value()
			sequenceNumber := key[16:]

			rawTime := key[8:16]
			rawValue := &rawColumnValue{time: rawTime, sequence: sequenceNumber, value: value}
			rawColumnValues[i] = rawValue
		}

		var pointTimeRaw []byte
		var pointSequenceRaw []byte
		// choose the highest (or lowest in case of ascending queries) timestamp
		// and sequence number. that will become the timestamp and sequence of
		// the next point.
		for _, value := range rawColumnValues {
			if value == nil {
				continue
			}

			pointTimeRaw, pointSequenceRaw = value.updatePointTimeAndSequence(pointTimeRaw,
				pointSequenceRaw, query.Ascending)
		}

		for i, iterator := range iterators {
			// if the value is nil or doesn't match the point's timestamp and sequence number
			// then skip it
			if rawColumnValues[i] == nil ||
				!bytes.Equal(rawColumnValues[i].time, pointTimeRaw) ||
				!bytes.Equal(rawColumnValues[i].sequence, pointSequenceRaw) {

				point.Values[i] = &protocol.FieldValue{IsNull: &TRUE}
				continue
			}

			// if we emitted at lease one column, then we should keep
			// trying to get more points
			isValid = true

			// advance the iterator to read a new value in the next iteration
			if query.Ascending {
				iterator.Next()
			} else {
				iterator.Prev()
			}

			fv := &protocol.FieldValue{}
			resultByteCount += len(rawColumnValues[i].value)
			err := proto.Unmarshal(rawColumnValues[i].value, fv)
			if err != nil {
				return err
			}
			point.Values[i] = fv
			rawColumnValues[i] = nil
		}

		var sequence uint64
		// set the point sequence number and timestamp
		binary.Read(bytes.NewBuffer(pointSequenceRaw), binary.BigEndian, &sequence)
		var t uint64
		binary.Read(bytes.NewBuffer(pointTimeRaw), binary.BigEndian, &t)
		time := self.convertUintTimestampToInt64(&t)
		point.SetTimestampInMicroseconds(time)
		point.SequenceNumber = &sequence

		// stop the loop if we ran out of points
		if !isValid {
			break
		}

		limit -= 1

		if ringFilter != nil && ringFilter(&database, &series, point.Timestamp) {
			continue
		}

		result.Points = append(result.Points, point)

		// add byte count for the timestamp and the sequence
		resultByteCount += 16

		// check if we should send the batch along
		if resultByteCount > MAX_SERIES_SIZE || (shouldLimit && limit == 0) {
			dropped, err := self.sendBatch(query, result, yield)
			if err != nil {
				return err
			}
			limit += dropped
			resultByteCount = 0
			result = &protocol.Series{Name: &series, Fields: fieldNames, Points: make([]*protocol.Point, 0)}
		}
		if shouldLimit && limit < 1 {
			break
		}
	}
	if _, err := self.sendBatch(query, result, yield); err != nil {
		return err
	}
	_, err = self.sendBatch(query, emptyResult, yield)
	return err
}
Beispiel #22
0
func (self *QueryEngine) executeCountQueryWithGroupBy(query *parser.SelectQuery, yield func(*protocol.Series) error) error {
	self.aggregateYield = yield
	duration, err := query.GetGroupByClause().GetGroupByTime()
	if err != nil {
		return err
	}

	self.isAggregateQuery = true
	self.duration = duration
	self.aggregators = []Aggregator{}

	for _, value := range query.GetColumnNames() {
		if value.IsFunctionCall() {
			lowerCaseName := strings.ToLower(value.Name)
			initializer := registeredAggregators[lowerCaseName]
			if initializer == nil {
				return common.NewQueryError(common.InvalidArgument, fmt.Sprintf("Unknown function %s", value.Name))
			}
			aggregator, err := initializer(query, value, query.GetGroupByClause().FillValue)
			if err != nil {
				return err
			}
			self.aggregators = append(self.aggregators, aggregator)
		}
	}
	timestampAggregator, err := NewTimestampAggregator(query, nil)
	if err != nil {
		return err
	}
	self.timestampAggregator = timestampAggregator
	self.groups = make(map[string]map[Group]bool)
	self.pointsRange = make(map[string]*PointRange)
	self.groupBy = query.GetGroupByClause()

	err = self.distributeQuery(query, func(series *protocol.Series) error {
		if len(series.Points) == 0 {
			return nil
		}

		var mapper Mapper
		mapper, err = createValuesToInterface(self.groupBy, series.Fields)
		if err != nil {
			return err
		}

		for _, aggregator := range self.aggregators {
			if err := aggregator.InitializeFieldsMetadata(series); err != nil {
				return err
			}
		}

		currentRange := self.pointsRange[*series.Name]
		for _, point := range series.Points {
			value := mapper(point)
			for _, aggregator := range self.aggregators {
				err := aggregator.AggregatePoint(*series.Name, value, point)
				if err != nil {
					return err
				}
			}

			self.timestampAggregator.AggregatePoint(*series.Name, value, point)
			seriesGroups := self.groups[*series.Name]
			if seriesGroups == nil {
				seriesGroups = make(map[Group]bool)
				self.groups[*series.Name] = seriesGroups
			}
			seriesGroups[value] = true

			if currentRange == nil {
				currentRange = &PointRange{*point.Timestamp, *point.Timestamp}
				self.pointsRange[*series.Name] = currentRange
			} else {
				currentRange.UpdateRange(point)
			}
		}

		return nil
	})

	return err
}
Beispiel #23
0
func NewFilteringEngine(query *parser.SelectQuery, processor QueryProcessor) *FilteringEngine {
	shouldFilter := query.GetWhereCondition() != nil
	return &FilteringEngine{query, processor, shouldFilter}
}
Beispiel #24
0
func (self *QueryEngine) executeCountQueryWithGroupBy(user common.User, database string, query *parser.SelectQuery,
	yield func(*protocol.Series) error) error {
	duration, err := query.GetGroupByClause().GetGroupByTime()
	if err != nil {
		return err
	}

	aggregators := []Aggregator{}
	for _, value := range query.GetColumnNames() {
		if value.IsFunctionCall() {
			lowerCaseName := strings.ToLower(value.Name)
			initializer := registeredAggregators[lowerCaseName]
			if initializer == nil {
				return common.NewQueryError(common.InvalidArgument, fmt.Sprintf("Unknown function %s", value.Name))
			}
			aggregator, err := initializer(query, value, query.GetGroupByClause().FillValue)
			if err != nil {
				return err
			}
			aggregators = append(aggregators, aggregator)
		}
	}
	timestampAggregator, err := NewTimestampAggregator(query, nil)
	if err != nil {
		return err
	}

	groups := make(map[string]map[Group]bool)
	pointsRange := make(map[string]*PointRange)
	groupBy := query.GetGroupByClause()

	err = self.distributeQuery(user, database, query, func(series *protocol.Series) error {
		if len(series.Points) == 0 {
			return nil
		}

		var mapper Mapper
		mapper, err = createValuesToInterface(groupBy, series.Fields)
		if err != nil {
			return err
		}

		for _, aggregator := range aggregators {
			if err := aggregator.InitializeFieldsMetadata(series); err != nil {
				return err
			}
		}

		currentRange := pointsRange[*series.Name]
		for _, point := range series.Points {
			value := mapper(point)
			for _, aggregator := range aggregators {
				err := aggregator.AggregatePoint(*series.Name, value, point)
				if err != nil {
					return err
				}
			}

			timestampAggregator.AggregatePoint(*series.Name, value, point)
			seriesGroups := groups[*series.Name]
			if seriesGroups == nil {
				seriesGroups = make(map[Group]bool)
				groups[*series.Name] = seriesGroups
			}
			seriesGroups[value] = true

			if currentRange == nil {
				currentRange = &PointRange{*point.Timestamp, *point.Timestamp}
				pointsRange[*series.Name] = currentRange
			} else {
				currentRange.UpdateRange(point)
			}
		}

		return nil
	})

	if err != nil {
		return err
	}

	fields := []string{}

	for _, aggregator := range aggregators {
		columnNames := aggregator.ColumnNames()
		fields = append(fields, columnNames...)
	}

	for _, value := range groupBy.Elems {
		if value.IsFunctionCall() {
			continue
		}

		tempName := value.Name
		fields = append(fields, tempName)
	}

	for table, tableGroups := range groups {
		tempTable := table
		points := []*protocol.Point{}

		var _groups []Group

		if !query.GetGroupByClause().FillWithZero || duration == nil {
			// sort the table groups by timestamp
			_groups = make([]Group, 0, len(tableGroups))
			for groupId, _ := range tableGroups {
				_groups = append(_groups, groupId)
			}

		} else {
			groupsWithTime := map[Group]bool{}
			timeRange, ok := pointsRange[table]
			if ok {
				first := timeRange.startTime * 1000 / int64(*duration) * int64(*duration)
				end := timeRange.endTime * 1000 / int64(*duration) * int64(*duration)
				for i := 0; ; i++ {
					timestamp := first + int64(i)*int64(*duration)
					if end < timestamp {
						break
					}
					for group, _ := range tableGroups {
						groupWithTime := group.WithoutTimestamp().WithTimestamp(timestamp / 1000)
						groupsWithTime[groupWithTime] = true
					}
				}

				for groupId, _ := range groupsWithTime {
					_groups = append(_groups, groupId)
				}
			}
		}

		fillWithZero := duration != nil && query.GetGroupByClause().FillWithZero
		var sortedGroups SortableGroups
		if fillWithZero {
			if query.Ascending {
				sortedGroups = &AscendingGroupTimestampSortableGroups{CommonSortableGroups{_groups, table}}
			} else {
				sortedGroups = &DescendingGroupTimestampSortableGroups{CommonSortableGroups{_groups, table}}
			}
		} else {
			if query.Ascending {
				sortedGroups = &AscendingAggregatorSortableGroups{CommonSortableGroups{_groups, table}, timestampAggregator}
			} else {
				sortedGroups = &DescendingAggregatorSortableGroups{CommonSortableGroups{_groups, table}, timestampAggregator}
			}
		}
		sort.Sort(sortedGroups)

		for _, groupId := range sortedGroups.GetSortedGroups() {
			var timestamp int64
			if groupId.HasTimestamp() {
				timestamp = groupId.GetTimestamp()
			} else {
				timestamp = *timestampAggregator.GetValues(table, groupId)[0][0].Int64Value
			}
			values := [][][]*protocol.FieldValue{}

			for _, aggregator := range aggregators {
				values = append(values, aggregator.GetValues(table, groupId))
			}

			// do cross product of all the values
			_values := crossProduct(values)

			for _, v := range _values {
				/* groupPoints := []*protocol.Point{} */
				point := &protocol.Point{
					Values: v,
				}
				point.SetTimestampInMicroseconds(timestamp)

				// FIXME: this should be looking at the fields slice not the group by clause
				// FIXME: we should check whether the selected columns are in the group by clause
				for idx, _ := range groupBy.Elems {
					if duration != nil && idx == 0 {
						continue
					}

					value := groupId.GetValue(idx)

					switch x := value.(type) {
					case string:
						point.Values = append(point.Values, &protocol.FieldValue{StringValue: &x})
					case bool:
						point.Values = append(point.Values, &protocol.FieldValue{BoolValue: &x})
					case float64:
						point.Values = append(point.Values, &protocol.FieldValue{DoubleValue: &x})
					case int64:
						point.Values = append(point.Values, &protocol.FieldValue{Int64Value: &x})
					case nil:
						point.Values = append(point.Values, nil)
					}
				}

				points = append(points, point)
			}
		}
		expectedData := &protocol.Series{
			Name:   &tempTable,
			Fields: fields,
			Points: points,
		}
		yield(expectedData)
	}

	return nil
}
Beispiel #25
0
func (self *QueryEngine) executeCountQueryWithGroupBy(query *parser.SelectQuery, yield func(*protocol.Series) error) error {
	self.aggregateYield = yield
	duration, err := query.GetGroupByClause().GetGroupByTime()
	if err != nil {
		return err
	}

	self.isAggregateQuery = true
	self.duration = duration
	self.aggregators = []Aggregator{}

	for _, value := range query.GetColumnNames() {
		if !value.IsFunctionCall() {
			continue
		}
		lowerCaseName := strings.ToLower(value.Name)
		initializer := registeredAggregators[lowerCaseName]
		if initializer == nil {
			return common.NewQueryError(common.InvalidArgument, fmt.Sprintf("Unknown function %s", value.Name))
		}
		aggregator, err := initializer(query, value, query.GetGroupByClause().FillValue)
		if err != nil {
			return common.NewQueryError(common.InvalidArgument, fmt.Sprintf("%s", err))
		}
		self.aggregators = append(self.aggregators, aggregator)
	}

	timestampAggregator, err := NewTimestampAggregator(query, nil)
	if err != nil {
		return err
	}
	self.timestampAggregator = timestampAggregator
	self.groups = make(map[string]map[Group]bool)
	self.pointsRange = make(map[string]*PointRange)
	self.groupBy = query.GetGroupByClause()

	self.initializeFields()

	err = self.distributeQuery(query, func(series *protocol.Series) error {
		if len(series.Points) == 0 {
			return nil
		}

		// if we're not doing group by time() then keep all the state in
		// memory until the query finishes reading all data points
		if self.duration == nil || query.GetGroupByClause().FillWithZero {
			return self.aggregateValuesForSeries(series)
		}

		// otherwise, keep the state for the current bucket. Once ticks
		// come in for a different time bucket, we flush the state that's
		// kept in memory by the aggregators

		// split the time series by time buckets
		bucketedSeries := []*protocol.Series{}
		currentSeries := &protocol.Series{
			Name:   series.Name,
			Fields: series.Fields,
			Points: []*protocol.Point{series.Points[0]},
		}
		currentBucket := self.getTimestampFromPoint(series.Points[0])
		for _, p := range series.Points[1:] {
			bucket := self.getTimestampFromPoint(p)
			if bucket != currentBucket {
				bucketedSeries = append(bucketedSeries, currentSeries)
				currentSeries = &protocol.Series{Name: series.Name, Fields: series.Fields}
				currentBucket = bucket
			}
			currentSeries.Points = append(currentSeries.Points, p)
		}
		bucketedSeries = append(bucketedSeries, currentSeries)

		for _, s := range bucketedSeries[:len(bucketedSeries)-1] {
			if err := self.aggregateValuesForSeries(s); err != nil {
				return err
			}
			self.calculateSummariesForTable(*s.Name)
		}

		last := bucketedSeries[len(bucketedSeries)-1]
		bucket := self.getTimestampFromPoint(last.Points[0])
		if b, ok := self.buckets[*series.Name]; ok && b != bucket {
			self.calculateSummariesForTable(*last.Name)
		}

		self.buckets[*series.Name] = bucket
		return self.aggregateValuesForSeries(last)
	})

	return err
}