func (self *ShardData) QueryResponseBufferSize(querySpec *parser.QuerySpec, batchPointSize int) int { groupByTime := querySpec.GetGroupByInterval() if groupByTime == nil { // If the group by time is nil, we shouldn't have to use a buffer since the shards should be queried sequentially. // However, set this to something high just to be safe. log.Debug("BUFFER SIZE: 1000") return 1000 } tickCount := int(self.shardNanoseconds / uint64(*groupByTime)) if tickCount < 10 { tickCount = 100 } else if tickCount > 1000 { // cap this because each response should have up to this number of points in it. tickCount = tickCount / batchPointSize // but make sure it's at least 1k if tickCount < 1000 { tickCount = 1000 } } columnCount := querySpec.GetGroupByColumnCount() if columnCount > 1 { // we don't really know the cardinality for any column up front. This is a just a multiplier so we'll see how this goes. // each response can have many points, so having a buffer of the ticks * 100 should be safe, but we'll see. tickCount = tickCount * 100 } log.Debug("BUFFER SIZE: %d", tickCount) return tickCount }
// Returns true if we can aggregate the data locally per shard, // i.e. the group by interval lines up with the shard duration and // there are no joins or merges func (self *ShardData) ShouldAggregateLocally(querySpec *parser.QuerySpec) bool { f := querySpec.GetFromClause() if f != nil && (f.Type == parser.FromClauseInnerJoin || f.Type == parser.FromClauseMerge) { return false } groupByInterval := querySpec.GetGroupByInterval() if groupByInterval == nil { if querySpec.HasAggregates() { return false } return true } return (self.shardDuration%*groupByInterval == 0) && !querySpec.GroupByIrregularInterval }
func (self *Coordinator) shouldQuerySequentially(shards cluster.Shards, querySpec *parser.QuerySpec) bool { // if the query isn't a select, then it doesn't matter if querySpec.SelectQuery() == nil { return false } // if the query is a regex, we can't predic the number of responses // we get back if querySpec.IsRegex() { return true } groupByClause := querySpec.SelectQuery().GetGroupByClause() // if there's no group by clause, then we're returning raw points // with some math done on them, thus we can't predict the number of // points if groupByClause == nil { return true } // if there's a group by clause but no group by interval, we can't // predict the cardinality of the columns used in the group by // interval, thus we can't predict the number of responses returned // from the shard if querySpec.GetGroupByInterval() == nil { return true } // if there's a group by time and other columns, then the previous // logic holds if len(groupByClause.Elems) > 1 { return true } if !shards.ShouldAggregateLocally(querySpec) { return true } for _, shard := range shards { bufferSize := shard.QueryResponseBufferSize(querySpec, self.config.StoragePointBatchSize) // if the number of repsonses is too big, do a sequential querying if bufferSize > self.config.ClusterMaxResponseBufferSize { return true } } // parallel querying only if we're querying a single series, with // group by time only return false }