func (self *Coordinator) getShardsAndProcessor(querySpec *parser.QuerySpec, writer engine.Processor) ([]*cluster.ShardData, engine.Processor, error) { shards, err := self.clusterConfiguration.GetShardsForQuery(querySpec) if err != nil { return nil, nil, err } shouldAggregateLocally := shards.ShouldAggregateLocally(querySpec) q := querySpec.SelectQuery() if q == nil { return shards, writer, nil } if !shouldAggregateLocally { // if we should aggregate in the coordinator (i.e. aggregation // isn't happening locally at the shard level), create an engine shardIds := make([]uint32, len(shards)) for i, s := range shards { shardIds[i] = s.Id() } writer, err = engine.NewQueryEngine(writer, q, shardIds) if err != nil { log.Error(err) log.Debug("Coordinator processor chain: %s", engine.ProcessorChain(writer)) } return shards, writer, err } // if we have a query with limit, then create an engine, or we can // make the passthrough limit aware writer = engine.NewPassthroughEngineWithLimit(writer, 100, q.Limit) return shards, writer, nil }
func (self *Shard) Query(querySpec *parser.QuerySpec, processor engine.Processor) error { self.closeLock.RLock() defer self.closeLock.RUnlock() if self.closed { return fmt.Errorf("Shard is closed") } if querySpec.IsListSeriesQuery() { return fmt.Errorf("List series queries should never come to the shard") } else if querySpec.IsDeleteFromSeriesQuery() { return self.executeDeleteQuery(querySpec, processor) } if !self.hasReadAccess(querySpec) { return errors.New("User does not have access to one or more of the series requested.") } switch t := querySpec.SelectQuery().FromClause.Type; t { case parser.FromClauseArray: log.Debug("Shard %s: running a regular query", self.db.Path()) return self.executeArrayQuery(querySpec, processor) case parser.FromClauseMerge, parser.FromClauseInnerJoin: log.Debug("Shard %s: running a merge query", self.db.Path()) return self.executeMergeQuery(querySpec, processor, t) default: panic(fmt.Errorf("Unknown from clause type %s", t)) } }
func (self *Shard) executeArrayQuery(querySpec *parser.QuerySpec, processor engine.Processor) error { seriesAndColumns := querySpec.SelectQuery().GetReferencedColumns() for series, columns := range seriesAndColumns { if regex, ok := series.GetCompiledRegex(); ok { seriesNames := self.metaStore.GetSeriesForDatabaseAndRegex(querySpec.Database(), regex) for _, name := range seriesNames { if !querySpec.HasReadAccess(name) { continue } err := self.executeQueryForSeries(querySpec, name, columns, processor) if err != nil { return err } } } else { err := self.executeQueryForSeries(querySpec, series.Name, columns, processor) if err != nil { return err } } } return nil }
func (self *Shard) executeMergeQuery(querySpec *parser.QuerySpec, processor engine.Processor, t parser.FromClauseType) error { seriesAndColumns := querySpec.SelectQuery().GetReferencedColumns() iterators := make([]*PointIterator, len(seriesAndColumns)) streams := make([]engine.StreamQuery, len(iterators)) i := 0 var err error for s, c := range seriesAndColumns { c, iterators[i], err = self.getPointIteratorForSeries(querySpec, s.Name, c) if err != nil { log.Error(err) return err } defer iterators[i].Close() aliases := querySpec.SelectQuery().GetTableAliases(s.Name) if len(aliases) > 1 { return fmt.Errorf("Cannot have the same table joined more than once") } streams[i] = PointIteratorStream{ pi: iterators[i], name: aliases[0], fields: c, } i++ } h := engine.NewSeriesHeap(querySpec.IsAscending()) merger := engine.NewCME("Shard", streams, h, processor, t == parser.FromClauseMerge) if _, err := merger.Update(); err != nil { return err } return nil }
func (self *ShardData) getProcessor(querySpec *parser.QuerySpec, processor engine.Processor) (engine.Processor, error) { switch qt := querySpec.Query().Type(); qt { case parser.Delete, parser.DropSeries: return NilProcessor{}, nil case parser.Select: // continue default: panic(fmt.Errorf("Unexpected query type: %s", qt)) } if querySpec.IsSinglePointQuery() { return engine.NewPassthroughEngine(processor, 1), nil } query := querySpec.SelectQuery() var err error // We should aggregate at the shard level if self.ShouldAggregateLocally(querySpec) { log.Debug("creating a query engine") processor, err = engine.NewQueryEngine(processor, query, nil) if err != nil { return nil, err } goto addFilter } // we shouldn't limit the queries if they have aggregates and aren't // aggregated locally, otherwise the aggregation result which happen // in the coordinator will get partial data and will be incorrect if query.HasAggregates() { log.Debug("creating a passthrough engine") processor = engine.NewPassthroughEngine(processor, 1000) goto addFilter } // This is an optimization so we don't send more data that we should // over the wire. The coordinator has its own Passthrough which does // the final limit. if l := query.Limit; l > 0 { log.Debug("creating a passthrough engine with limit") processor = engine.NewPassthroughEngineWithLimit(processor, 1000, query.Limit) } addFilter: if query := querySpec.SelectQuery(); query != nil && query.GetFromClause().Type != parser.FromClauseInnerJoin { // Joins do their own filtering since we need to get all // points before filtering. This is due to the fact that some // where expressions will be difficult to compute before the // points are joined together, think where clause with // left.column = 'something' or right.column = // 'something_else'. We can't filter the individual series // separately. The filtering happens in merge.go:55 processor = engine.NewFilteringEngine(query, processor) } return processor, nil }
func (self *Coordinator) expandRegex(spec *parser.QuerySpec) { q := spec.SelectQuery() if q == nil { return } f := func(r *regexp.Regexp) []string { return self.clusterConfiguration.MetaStore.GetSeriesForDatabaseAndRegex(spec.Database(), r) } parser.RewriteMergeQuery(q, f) }
func (self *Shard) executeQueryForSeries(querySpec *parser.QuerySpec, name string, columns []string, processor engine.Processor) error { if querySpec.IsSinglePointQuery() { log.Debug("Running single query for series %s", name) return self.executeSinglePointQuery(querySpec, name, columns, processor) } var pi *PointIterator var err error columns, pi, err = self.getPointIteratorForSeries(querySpec, name, columns) if err != nil { return err } defer pi.Close() query := querySpec.SelectQuery() aliases := query.GetTableAliases(name) seriesOutgoing := &protocol.Series{Name: protocol.String(name), Fields: columns, Points: make([]*protocol.Point, 0, self.pointBatchSize)} for pi.Valid() { p := pi.Point() seriesOutgoing.Points = append(seriesOutgoing.Points, p) if len(seriesOutgoing.Points) >= self.pointBatchSize { ok, err := yieldToProcessor(seriesOutgoing, processor, aliases) if !ok || err != nil { log.Debug("Stopping processing.") if err != nil { log.Error("Error while processing data: %v", err) return err } return nil } seriesOutgoing = &protocol.Series{Name: protocol.String(name), Fields: columns, Points: make([]*protocol.Point, 0, self.pointBatchSize)} } pi.Next() } if err := pi.Error(); err != nil { return err } //Yield remaining data if ok, err := yieldToProcessor(seriesOutgoing, processor, aliases); !ok || err != nil { log.Debug("Stopping processing remaining points...") if err != nil { log.Error("Error while processing data: %v", err) return err } } log.Debug("Finished running query %s", query.GetQueryString()) return nil }
func (self *Permissions) AuthorizeSelectQuery(user common.User, db string, querySpec *parser.QuerySpec) (ok bool, err common.AuthorizationError) { // if this isn't a regex query do the permission check here fromClause := querySpec.SelectQuery().GetFromClause() for _, n := range fromClause.Names { if _, ok := n.Name.GetCompiledRegex(); ok { break } else if name := n.Name.Name; !user.HasReadAccess(name) { return false, common.NewAuthorizationError("User doesn't have read access to %s", name) } } return true, "" }
func (self *Shard) executeSinglePointQuery(querySpec *parser.QuerySpec, name string, columns []string, p engine.Processor) error { fields, err := self.getFieldsForSeries(querySpec.Database(), name, columns) if err != nil { log.Error("Error looking up fields for %s: %s", name, err) return err } query := querySpec.SelectQuery() fieldCount := len(fields) fieldNames := make([]string, 0, fieldCount) point := &protocol.Point{Values: make([]*protocol.FieldValue, 0, fieldCount)} timestamp := common.TimeToMicroseconds(query.GetStartTime()) sequenceNumber, err := query.GetSinglePointQuerySequenceNumber() if err != nil { return err } // set the timestamp and sequence number point.SequenceNumber = &sequenceNumber point.SetTimestampInMicroseconds(timestamp) for _, field := range fields { sk := newStorageKey(field.Id, timestamp, sequenceNumber) data, err := self.db.Get(sk.bytes()) if err != nil { return err } if data == nil { continue } fieldValue := &protocol.FieldValue{} err = proto.Unmarshal(data, fieldValue) if err != nil { return err } fieldNames = append(fieldNames, field.Name) point.Values = append(point.Values, fieldValue) } result := &protocol.Series{Name: &name, Fields: fieldNames, Points: []*protocol.Point{point}} if len(result.Points) > 0 { _, err := p.Yield(result) return err } return nil }
func (self *Coordinator) shouldQuerySequentially(shards cluster.Shards, querySpec *parser.QuerySpec) bool { // if the query isn't a select, then it doesn't matter if querySpec.SelectQuery() == nil { return false } // if the query is a regex, we can't predic the number of responses // we get back if querySpec.IsRegex() { return true } groupByClause := querySpec.SelectQuery().GetGroupByClause() // if there's no group by clause, then we're returning raw points // with some math done on them, thus we can't predict the number of // points if groupByClause == nil { return true } // if there's a group by clause but no group by interval, we can't // predict the cardinality of the columns used in the group by // interval, thus we can't predict the number of responses returned // from the shard if querySpec.GetGroupByInterval() == nil { return true } // if there's a group by time and other columns, then the previous // logic holds if len(groupByClause.Elems) > 1 { return true } if !shards.ShouldAggregateLocally(querySpec) { return true } for _, shard := range shards { bufferSize := shard.QueryResponseBufferSize(querySpec, self.config.StoragePointBatchSize) // if the number of repsonses is too big, do a sequential querying if bufferSize > self.config.ClusterMaxResponseBufferSize { return true } } // parallel querying only if we're querying a single series, with // group by time only return false }
func (self *Shard) getPointIteratorForSeries(querySpec *parser.QuerySpec, name string, columns []string) ([]string, *PointIterator, error) { fields, err := self.getFieldsForSeries(querySpec.Database(), name, columns) if err != nil { log.Error("Error looking up fields for %s: %s", name, err) return nil, nil, err } startTime := querySpec.GetStartTime() endTime := querySpec.GetEndTime() query := querySpec.SelectQuery() iterators := self.getIterators(fields, startTime, endTime, query.Ascending) pi := NewPointIterator(iterators, fields, querySpec.GetStartTime(), querySpec.GetEndTime(), query.Ascending) columns = make([]string, len(fields)) for i := range fields { columns[i] = fields[i].Name } return columns, pi, nil }