Beispiel #1
0
func (self *ShardData) QueryResponseBufferSize(querySpec *parser.QuerySpec, batchPointSize int) int {
	groupByTime := querySpec.GetGroupByInterval()
	if groupByTime == nil {
		// If the group by time is nil, we shouldn't have to use a buffer since the shards should be queried sequentially.
		// However, set this to something high just to be safe.
		log.Debug("BUFFER SIZE: 1000")
		return 1000
	}

	tickCount := int(self.shardNanoseconds / uint64(*groupByTime))
	if tickCount < 10 {
		tickCount = 100
	} else if tickCount > 1000 {
		// cap this because each response should have up to this number of points in it.
		tickCount = tickCount / batchPointSize

		// but make sure it's at least 1k
		if tickCount < 1000 {
			tickCount = 1000
		}
	}
	columnCount := querySpec.GetGroupByColumnCount()
	if columnCount > 1 {
		// we don't really know the cardinality for any column up front. This is a just a multiplier so we'll see how this goes.
		// each response can have many points, so having a buffer of the ticks * 100 should be safe, but we'll see.
		tickCount = tickCount * 100
	}
	log.Debug("BUFFER SIZE: %d", tickCount)
	return tickCount
}
Beispiel #2
0
func (self *Coordinator) getShardsAndProcessor(querySpec *parser.QuerySpec, writer engine.Processor) ([]*cluster.ShardData, engine.Processor, error) {
	shards, err := self.clusterConfiguration.GetShardsForQuery(querySpec)
	if err != nil {
		return nil, nil, err
	}
	shouldAggregateLocally := shards.ShouldAggregateLocally(querySpec)

	q := querySpec.SelectQuery()
	if q == nil {
		return shards, writer, nil
	}

	if !shouldAggregateLocally {
		// if we should aggregate in the coordinator (i.e. aggregation
		// isn't happening locally at the shard level), create an engine
		shardIds := make([]uint32, len(shards))
		for i, s := range shards {
			shardIds[i] = s.Id()
		}
		writer, err = engine.NewQueryEngine(writer, q, shardIds)
		if err != nil {
			log.Error(err)
			log.Debug("Coordinator processor chain: %s", engine.ProcessorChain(writer))
		}
		return shards, writer, err
	}

	// if we have a query with limit, then create an engine, or we can
	// make the passthrough limit aware
	writer = engine.NewPassthroughEngineWithLimit(writer, 100, q.Limit)
	return shards, writer, nil
}
func (self *ClusterConfiguration) getShardsToMatchQuery(querySpec *parser.QuerySpec) ([]*ShardData, error) {
	self.shardLock.RLock()
	defer self.shardLock.RUnlock()
	seriesNames, fromRegex := querySpec.TableNamesAndRegex()
	db := querySpec.Database()
	if fromRegex != nil {
		seriesNames = self.MetaStore.GetSeriesForDatabaseAndRegex(db, fromRegex)
	}
	uniqueShards := make(map[uint32]*ShardData)
	for _, name := range seriesNames {
		if fs := self.MetaStore.GetFieldsForSeries(db, name); len(fs) == 0 {
			return nil, fmt.Errorf("Couldn't find series: %s", name)
		}
		space := self.getShardSpaceToMatchSeriesName(db, name)
		if space == nil {
			continue
		}
		for _, shard := range space.shards {
			uniqueShards[shard.id] = shard
		}
	}
	shards := make([]*ShardData, 0, len(uniqueShards))
	for _, shard := range uniqueShards {
		shards = append(shards, shard)
	}
	SortShardsByTimeDescending(shards)
	return shards, nil
}
Beispiel #4
0
func (self *Permissions) CheckQueryPermissions(user common.User, db string, querySpec *parser.QuerySpec) (ok bool, err common.AuthorizationError) {
	switch querySpec.Query().Type() {
	case parser.Delete:
		return self.AuthorizeDeleteQuery(user, db)
	case parser.Select:
		return self.AuthorizeSelectQuery(user, db, querySpec)
	default:
		return true, ""
	}
}
Beispiel #5
0
func (self *Shard) hasReadAccess(querySpec *parser.QuerySpec) bool {
	for series := range querySpec.SeriesValuesAndColumns() {
		if _, isRegex := series.GetCompiledRegex(); !isRegex {
			if !querySpec.HasReadAccess(series.Name) {
				return false
			}
		}
	}
	return true
}
Beispiel #6
0
func (self *Coordinator) expandRegex(spec *parser.QuerySpec) {
	q := spec.SelectQuery()
	if q == nil {
		return
	}

	f := func(r *regexp.Regexp) []string {
		return self.clusterConfiguration.MetaStore.GetSeriesForDatabaseAndRegex(spec.Database(), r)
	}

	parser.RewriteMergeQuery(q, f)
}
func (self *ClusterConfiguration) GetShardsForQuery(querySpec *parser.QuerySpec) (Shards, error) {
	shards, err := self.getShardsToMatchQuery(querySpec)
	if err != nil {
		return nil, err
	}
	log.Debug("Querying %d shards for query", len(shards))
	shards = self.getShardRange(querySpec, shards)
	if querySpec.IsAscending() {
		SortShardsByTimeAscending(shards)
	}
	return shards, nil
}
Beispiel #8
0
func (self *Shard) executeQueryForSeries(querySpec *parser.QuerySpec, name string, columns []string, processor engine.Processor) error {
	if querySpec.IsSinglePointQuery() {
		log.Debug("Running single query for series %s", name)
		return self.executeSinglePointQuery(querySpec, name, columns, processor)
	}
	var pi *PointIterator
	var err error
	columns, pi, err = self.getPointIteratorForSeries(querySpec, name, columns)
	if err != nil {
		return err
	}
	defer pi.Close()

	query := querySpec.SelectQuery()
	aliases := query.GetTableAliases(name)

	seriesOutgoing := &protocol.Series{Name: protocol.String(name), Fields: columns, Points: make([]*protocol.Point, 0, self.pointBatchSize)}
	for pi.Valid() {
		p := pi.Point()
		seriesOutgoing.Points = append(seriesOutgoing.Points, p)
		if len(seriesOutgoing.Points) >= self.pointBatchSize {
			ok, err := yieldToProcessor(seriesOutgoing, processor, aliases)
			if !ok || err != nil {
				log.Debug("Stopping processing.")
				if err != nil {
					log.Error("Error while processing data: %v", err)
					return err
				}
				return nil
			}
			seriesOutgoing = &protocol.Series{Name: protocol.String(name), Fields: columns, Points: make([]*protocol.Point, 0, self.pointBatchSize)}
		}

		pi.Next()
	}

	if err := pi.Error(); err != nil {
		return err
	}

	//Yield remaining data
	if ok, err := yieldToProcessor(seriesOutgoing, processor, aliases); !ok || err != nil {
		log.Debug("Stopping processing remaining points...")
		if err != nil {
			log.Error("Error while processing data: %v", err)
			return err
		}
	}

	log.Debug("Finished running query %s", query.GetQueryString())
	return nil
}
Beispiel #9
0
func (self *Permissions) AuthorizeSelectQuery(user common.User, db string, querySpec *parser.QuerySpec) (ok bool, err common.AuthorizationError) {
	// if this isn't a regex query do the permission check here
	fromClause := querySpec.SelectQuery().GetFromClause()

	for _, n := range fromClause.Names {
		if _, ok := n.Name.GetCompiledRegex(); ok {
			break
		} else if name := n.Name.Name; !user.HasReadAccess(name) {
			return false, common.NewAuthorizationError("User doesn't have read access to %s", name)
		}
	}
	return true, ""
}
Beispiel #10
0
func (self *Shard) executeSinglePointQuery(querySpec *parser.QuerySpec, name string, columns []string, p engine.Processor) error {
	fields, err := self.getFieldsForSeries(querySpec.Database(), name, columns)
	if err != nil {
		log.Error("Error looking up fields for %s: %s", name, err)
		return err
	}

	query := querySpec.SelectQuery()
	fieldCount := len(fields)
	fieldNames := make([]string, 0, fieldCount)
	point := &protocol.Point{Values: make([]*protocol.FieldValue, 0, fieldCount)}
	timestamp := common.TimeToMicroseconds(query.GetStartTime())
	sequenceNumber, err := query.GetSinglePointQuerySequenceNumber()
	if err != nil {
		return err
	}

	// set the timestamp and sequence number
	point.SequenceNumber = &sequenceNumber
	point.SetTimestampInMicroseconds(timestamp)

	for _, field := range fields {
		sk := newStorageKey(field.Id, timestamp, sequenceNumber)
		data, err := self.db.Get(sk.bytes())
		if err != nil {
			return err
		}

		if data == nil {
			continue
		}

		fieldValue := &protocol.FieldValue{}
		err = proto.Unmarshal(data, fieldValue)
		if err != nil {
			return err
		}
		fieldNames = append(fieldNames, field.Name)
		point.Values = append(point.Values, fieldValue)
	}

	result := &protocol.Series{Name: &name, Fields: fieldNames, Points: []*protocol.Point{point}}

	if len(result.Points) > 0 {
		_, err := p.Yield(result)
		return err
	}
	return nil
}
Beispiel #11
0
func (self *ShardData) getProcessor(querySpec *parser.QuerySpec, processor engine.Processor) (engine.Processor, error) {
	switch qt := querySpec.Query().Type(); qt {
	case parser.Delete, parser.DropSeries:
		return NilProcessor{}, nil
	case parser.Select:
		// continue
	default:
		panic(fmt.Errorf("Unexpected query type: %s", qt))
	}

	if querySpec.IsSinglePointQuery() {
		return engine.NewPassthroughEngine(processor, 1), nil
	}

	query := querySpec.SelectQuery()

	var err error
	// We should aggregate at the shard level
	if self.ShouldAggregateLocally(querySpec) {
		log.Debug("creating a query engine")
		processor, err = engine.NewQueryEngine(processor, query, nil)
		if err != nil {
			return nil, err
		}
		goto addFilter
	}

	// we shouldn't limit the queries if they have aggregates and aren't
	// aggregated locally, otherwise the aggregation result which happen
	// in the coordinator will get partial data and will be incorrect
	if query.HasAggregates() {
		log.Debug("creating a passthrough engine")
		processor = engine.NewPassthroughEngine(processor, 1000)
		goto addFilter
	}

	// This is an optimization so we don't send more data that we should
	// over the wire. The coordinator has its own Passthrough which does
	// the final limit.
	if l := query.Limit; l > 0 {
		log.Debug("creating a passthrough engine with limit")
		processor = engine.NewPassthroughEngineWithLimit(processor, 1000, query.Limit)
	}

addFilter:
	if query := querySpec.SelectQuery(); query != nil && query.GetFromClause().Type != parser.FromClauseInnerJoin {
		// Joins do their own filtering since we need to get all
		// points before filtering. This is due to the fact that some
		// where expressions will be difficult to compute before the
		// points are joined together, think where clause with
		// left.column = 'something' or right.column =
		// 'something_else'. We can't filter the individual series
		// separately. The filtering happens in merge.go:55

		processor = engine.NewFilteringEngine(query, processor)
	}
	return processor, nil
}
Beispiel #12
0
func (self *Coordinator) runDeleteQuery(querySpec *parser.QuerySpec, p engine.Processor) error {
	if err := self.clusterConfiguration.CreateCheckpoint(); err != nil {
		return err
	}
	querySpec.RunAgainstAllServersInShard = true
	return self.runQuerySpec(querySpec, p)
}
Beispiel #13
0
func (self *Shard) executeArrayQuery(querySpec *parser.QuerySpec, processor engine.Processor) error {
	seriesAndColumns := querySpec.SelectQuery().GetReferencedColumns()

	for series, columns := range seriesAndColumns {
		if regex, ok := series.GetCompiledRegex(); ok {
			seriesNames := self.metaStore.GetSeriesForDatabaseAndRegex(querySpec.Database(), regex)
			for _, name := range seriesNames {
				if !querySpec.HasReadAccess(name) {
					continue
				}
				err := self.executeQueryForSeries(querySpec, name, columns, processor)
				if err != nil {
					return err
				}
			}
		} else {
			err := self.executeQueryForSeries(querySpec, series.Name, columns, processor)
			if err != nil {
				return err
			}
		}
	}

	return nil
}
Beispiel #14
0
func (self *Shard) Query(querySpec *parser.QuerySpec, processor engine.Processor) error {
	self.closeLock.RLock()
	defer self.closeLock.RUnlock()
	if self.closed {
		return fmt.Errorf("Shard is closed")
	}
	if querySpec.IsListSeriesQuery() {
		return fmt.Errorf("List series queries should never come to the shard")
	} else if querySpec.IsDeleteFromSeriesQuery() {
		return self.executeDeleteQuery(querySpec, processor)
	}

	if !self.hasReadAccess(querySpec) {
		return errors.New("User does not have access to one or more of the series requested.")
	}

	switch t := querySpec.SelectQuery().FromClause.Type; t {
	case parser.FromClauseArray:
		log.Debug("Shard %s: running a regular query", self.db.Path())
		return self.executeArrayQuery(querySpec, processor)
	case parser.FromClauseMerge, parser.FromClauseInnerJoin:
		log.Debug("Shard %s: running a merge query", self.db.Path())
		return self.executeMergeQuery(querySpec, processor, t)
	default:
		panic(fmt.Errorf("Unknown from clause type %s", t))
	}
}
Beispiel #15
0
func (self *Shard) executeMergeQuery(querySpec *parser.QuerySpec, processor engine.Processor, t parser.FromClauseType) error {
	seriesAndColumns := querySpec.SelectQuery().GetReferencedColumns()
	iterators := make([]*PointIterator, len(seriesAndColumns))
	streams := make([]engine.StreamQuery, len(iterators))
	i := 0
	var err error
	for s, c := range seriesAndColumns {
		c, iterators[i], err = self.getPointIteratorForSeries(querySpec, s.Name, c)
		if err != nil {
			log.Error(err)
			return err
		}
		defer iterators[i].Close()
		aliases := querySpec.SelectQuery().GetTableAliases(s.Name)
		if len(aliases) > 1 {
			return fmt.Errorf("Cannot have the same table joined more than once")
		}
		streams[i] = PointIteratorStream{
			pi:     iterators[i],
			name:   aliases[0],
			fields: c,
		}
		i++
	}

	h := engine.NewSeriesHeap(querySpec.IsAscending())
	merger := engine.NewCME("Shard", streams, h, processor, t == parser.FromClauseMerge)
	if _, err := merger.Update(); err != nil {
		return err
	}
	return nil
}
Beispiel #16
0
func (self *Shard) executeDeleteQuery(querySpec *parser.QuerySpec, processor engine.Processor) error {
	query := querySpec.DeleteQuery()
	series := query.GetFromClause()
	database := querySpec.Database()
	if series.Type != parser.FromClauseArray {
		return fmt.Errorf("Merge and Inner joins can't be used with a delete query: %v", series.Type)
	}

	for _, name := range series.Names {
		var err error
		if regex, ok := name.Name.GetCompiledRegex(); ok {
			err = self.deleteRangeOfRegex(database, regex, query.GetStartTime(), query.GetEndTime())
		} else {
			err = self.deleteRangeOfSeries(database, name.Name.Name, query.GetStartTime(), query.GetEndTime())
		}

		if err != nil {
			return err
		}
	}
	self.db.Compact()
	return nil
}
Beispiel #17
0
func (self *Coordinator) shouldQuerySequentially(shards cluster.Shards, querySpec *parser.QuerySpec) bool {
	// if the query isn't a select, then it doesn't matter
	if querySpec.SelectQuery() == nil {
		return false
	}

	// if the query is a regex, we can't predic the number of responses
	// we get back
	if querySpec.IsRegex() {
		return true
	}
	groupByClause := querySpec.SelectQuery().GetGroupByClause()
	// if there's no group by clause, then we're returning raw points
	// with some math done on them, thus we can't predict the number of
	// points
	if groupByClause == nil {
		return true
	}
	// if there's a group by clause but no group by interval, we can't
	// predict the cardinality of the columns used in the group by
	// interval, thus we can't predict the number of responses returned
	// from the shard
	if querySpec.GetGroupByInterval() == nil {
		return true
	}
	// if there's a group by time and other columns, then the previous
	// logic holds
	if len(groupByClause.Elems) > 1 {
		return true
	}

	if !shards.ShouldAggregateLocally(querySpec) {
		return true
	}

	for _, shard := range shards {
		bufferSize := shard.QueryResponseBufferSize(querySpec, self.config.StoragePointBatchSize)
		// if the number of repsonses is too big, do a sequential querying
		if bufferSize > self.config.ClusterMaxResponseBufferSize {
			return true
		}
	}

	// parallel querying only if we're querying a single series, with
	// group by time only
	return false
}
Beispiel #18
0
func (self *Coordinator) runDropSeriesQuery(querySpec *parser.QuerySpec) error {
	user := querySpec.User()
	db := querySpec.Database()
	series := querySpec.Query().DropSeriesQuery.GetTableName()
	if ok, err := self.permissions.AuthorizeDropSeries(user, db, series); !ok {
		return err
	}
	err := self.raftServer.DropSeries(db, series)
	if err != nil {
		return err
	}
	return nil
}
Beispiel #19
0
func (self *Coordinator) runListSeriesQuery(querySpec *parser.QuerySpec, p engine.Processor) error {
	allSeries := self.clusterConfiguration.MetaStore.GetSeriesForDatabase(querySpec.Database())
	matchingSeries := allSeries
	q := querySpec.Query().GetListSeriesQuery()
	if q.HasRegex() {
		matchingSeries = nil
		regex := q.GetRegex()
		for _, s := range allSeries {
			if !regex.MatchString(s) {
				continue
			}
			matchingSeries = append(matchingSeries, s)
		}
	}
	name := "list_series_result"
	var fields []string
	points := make([]*protocol.Point, len(matchingSeries))

	if q.IncludeSpaces {
		fields = []string{"name", "space"}
		spaces := self.clusterConfiguration.GetShardSpacesForDatabase(querySpec.Database())

		for i, s := range matchingSeries {
			spaceName := ""
			for _, sp := range spaces {
				if sp.MatchesSeries(s) {
					spaceName = sp.Name
					break
				}
			}
			fieldValues := []*protocol.FieldValue{
				{StringValue: proto.String(s)},
				{StringValue: proto.String(spaceName)},
			}
			points[i] = &protocol.Point{Values: fieldValues}
		}
	} else {
		fields = []string{"name"}
		for i, s := range matchingSeries {
			fieldValues := []*protocol.FieldValue{
				{StringValue: proto.String(s)},
			}
			points[i] = &protocol.Point{Values: fieldValues}
		}
	}

	seriesResult := &protocol.Series{Name: &name, Fields: fields, Points: points}
	_, err := p.Yield(seriesResult)
	return err
}
Beispiel #20
0
// Returns true if we can aggregate the data locally per shard,
// i.e. the group by interval lines up with the shard duration and
// there are no joins or merges
func (self *ShardData) ShouldAggregateLocally(querySpec *parser.QuerySpec) bool {
	f := querySpec.GetFromClause()
	if f != nil && (f.Type == parser.FromClauseInnerJoin || f.Type == parser.FromClauseMerge) {
		return false
	}

	groupByInterval := querySpec.GetGroupByInterval()
	if groupByInterval == nil {
		if querySpec.HasAggregates() {
			return false
		}
		return true
	}
	return (self.shardDuration%*groupByInterval == 0) && !querySpec.GroupByIrregularInterval
}
Beispiel #21
0
func (self *ShardData) createRequest(querySpec *parser.QuerySpec) *p.Request {
	queryString := querySpec.GetQueryStringWithTimeCondition()
	user := querySpec.User()
	userName := user.GetName()
	database := querySpec.Database()
	isDbUser := !user.IsClusterAdmin()

	return &p.Request{
		Type:     p.Request_QUERY.Enum(),
		ShardId:  &self.id,
		Query:    &queryString,
		UserName: &userName,
		Database: &database,
		IsDbUser: &isDbUser,
	}
}
Beispiel #22
0
func (self *ShardData) logAndHandleDeleteQuery(querySpec *parser.QuerySpec, response chan<- *p.Response) {
	queryString := querySpec.GetQueryStringWithTimeCondition()
	request := self.createRequest(querySpec)
	request.Query = &queryString
	self.LogAndHandleDestructiveQuery(querySpec, request, response, false)
}
Beispiel #23
0
func (self *Shard) getPointIteratorForSeries(querySpec *parser.QuerySpec, name string, columns []string) ([]string, *PointIterator, error) {
	fields, err := self.getFieldsForSeries(querySpec.Database(), name, columns)
	if err != nil {
		log.Error("Error looking up fields for %s: %s", name, err)
		return nil, nil, err
	}

	startTime := querySpec.GetStartTime()
	endTime := querySpec.GetEndTime()

	query := querySpec.SelectQuery()

	iterators := self.getIterators(fields, startTime, endTime, query.Ascending)
	pi := NewPointIterator(iterators, fields, querySpec.GetStartTime(), querySpec.GetEndTime(), query.Ascending)

	columns = make([]string, len(fields))
	for i := range fields {
		columns[i] = fields[i].Name
	}

	return columns, pi, nil
}
Beispiel #24
0
func (self *ShardData) Query(querySpec *parser.QuerySpec, response chan<- *p.Response) {
	log.Debug("QUERY: shard %d, query '%s'", self.Id(), querySpec.GetQueryStringWithTimeCondition())
	defer common.RecoverFunc(querySpec.Database(), querySpec.GetQueryStringWithTimeCondition(), func(err interface{}) {
		response <- &p.Response{
			Type:         p.Response_ERROR.Enum(),
			ErrorMessage: p.String(fmt.Sprintf("%s", err)),
		}
	})

	// This is only for queries that are deletes or drops. They need to be sent everywhere as opposed to just the local or one of the remote shards.
	// But this boolean should only be set to true on the server that receives the initial query.
	if querySpec.RunAgainstAllServersInShard {
		if querySpec.IsDeleteFromSeriesQuery() {
			self.logAndHandleDeleteQuery(querySpec, response)
		} else if querySpec.IsDropSeriesQuery() {
			self.logAndHandleDropSeriesQuery(querySpec, response)
		}
	}

	if self.IsLocal {
		var processor engine.Processor = NewResponseChannelProcessor(NewResponseChannelWrapper(response))
		var err error

		processor = NewShardIdInserterProcessor(self.Id(), processor)

		processor, err = self.getProcessor(querySpec, processor)
		if err != nil {
			response <- &p.Response{
				Type:         p.Response_ERROR.Enum(),
				ErrorMessage: p.String(err.Error()),
			}
			log.Error("Error while creating engine: %s", err)
			return
		}
		shard, err := self.store.GetOrCreateShard(self.id)
		if err != nil {
			response <- &p.Response{
				Type:         p.Response_ERROR.Enum(),
				ErrorMessage: p.String(err.Error()),
			}
			log.Error("Error while getting shards: %s", err)
			return
		}
		defer self.store.ReturnShard(self.id)

		log.Debug("Processor chain:  %s\n", engine.ProcessorChain(processor))

		err = shard.Query(querySpec, processor)
		// if we call Close() in case of an error it will mask the error
		if err != nil {
			response <- &p.Response{
				Type:         p.Response_ERROR.Enum(),
				ErrorMessage: p.String(err.Error()),
			}
			return
		}
		processor.Close()
		response <- &p.Response{Type: p.Response_END_STREAM.Enum()}
		return
	}

	if server := self.randomHealthyServer(); server != nil {
		log.Debug("Querying server %d for shard %d", server.GetId(), self.Id())
		request := self.createRequest(querySpec)
		server.MakeRequest(request, response)
		return
	}

	message := fmt.Sprintf("No servers up to query shard %d", self.id)
	response <- &p.Response{
		Type:         p.Response_ERROR.Enum(),
		ErrorMessage: &message,
	}
	log.Error(message)
}