func (self *ShardData) getProcessor(querySpec *parser.QuerySpec, processor engine.Processor) (engine.Processor, error) { switch qt := querySpec.Query().Type(); qt { case parser.Delete, parser.DropSeries: return NilProcessor{}, nil case parser.Select: // continue default: panic(fmt.Errorf("Unexpected query type: %s", qt)) } if querySpec.IsSinglePointQuery() { return engine.NewPassthroughEngine(processor, 1), nil } query := querySpec.SelectQuery() var err error // We should aggregate at the shard level if self.ShouldAggregateLocally(querySpec) { log.Debug("creating a query engine") processor, err = engine.NewQueryEngine(processor, query, nil) if err != nil { return nil, err } goto addFilter } // we shouldn't limit the queries if they have aggregates and aren't // aggregated locally, otherwise the aggregation result which happen // in the coordinator will get partial data and will be incorrect if query.HasAggregates() { log.Debug("creating a passthrough engine") processor = engine.NewPassthroughEngine(processor, 1000) goto addFilter } // This is an optimization so we don't send more data that we should // over the wire. The coordinator has its own Passthrough which does // the final limit. if l := query.Limit; l > 0 { log.Debug("creating a passthrough engine with limit") processor = engine.NewPassthroughEngineWithLimit(processor, 1000, query.Limit) } addFilter: if query := querySpec.SelectQuery(); query != nil && query.GetFromClause().Type != parser.FromClauseInnerJoin { // Joins do their own filtering since we need to get all // points before filtering. This is due to the fact that some // where expressions will be difficult to compute before the // points are joined together, think where clause with // left.column = 'something' or right.column = // 'something_else'. We can't filter the individual series // separately. The filtering happens in merge.go:55 processor = engine.NewFilteringEngine(query, processor) } return processor, nil }
func (self *Shard) executeQueryForSeries(querySpec *parser.QuerySpec, seriesName string, columns []string, processor engine.Processor) error { fields, err := self.getFieldsForSeries(querySpec.Database(), seriesName, columns) if err != nil { log.Error("Error looking up fields for %s: %s", seriesName, err) return err } if querySpec.IsSinglePointQuery() { log.Debug("Running single query for series %s, fields %v", seriesName, fields) return self.executeSinglePointQuery(querySpec, seriesName, fields, processor) } startTime := querySpec.GetStartTime() endTime := querySpec.GetEndTime() query := querySpec.SelectQuery() aliases := query.GetTableAliases(seriesName) fieldNames, iterators := self.getIterators(fields, startTime, endTime, query.Ascending) seriesOutgoing := &protocol.Series{Name: protocol.String(seriesName), Fields: fieldNames, Points: make([]*protocol.Point, 0, self.pointBatchSize)} pi := NewPointIterator(iterators, fields, querySpec.GetStartTime(), querySpec.GetEndTime(), query.Ascending) defer pi.Close() for pi.Valid() { p := pi.Point() seriesOutgoing.Points = append(seriesOutgoing.Points, p) if len(seriesOutgoing.Points) >= self.pointBatchSize { ok, err := yieldToProcessor(seriesOutgoing, processor, aliases) if !ok || err != nil { log.Debug("Stopping processing.") if err != nil { log.Error("Error while processing data: %v", err) return err } } seriesOutgoing = &protocol.Series{Name: protocol.String(seriesName), Fields: fieldNames, Points: make([]*protocol.Point, 0, self.pointBatchSize)} } pi.Next() } if err := pi.Error(); err != nil { return err } //Yield remaining data if ok, err := yieldToProcessor(seriesOutgoing, processor, aliases); !ok || err != nil { log.Debug("Stopping processing remaining points...") if err != nil { log.Error("Error while processing data: %v", err) return err } } log.Debug("Finished running query %s", query.GetQueryString()) return nil }
func (self *Shard) executeQueryForSeries(querySpec *parser.QuerySpec, name string, columns []string, processor engine.Processor) error { if querySpec.IsSinglePointQuery() { log.Debug("Running single query for series %s", name) return self.executeSinglePointQuery(querySpec, name, columns, processor) } var pi *PointIterator var err error columns, pi, err = self.getPointIteratorForSeries(querySpec, name, columns) if err != nil { return err } defer pi.Close() query := querySpec.SelectQuery() aliases := query.GetTableAliases(name) seriesOutgoing := &protocol.Series{Name: protocol.String(name), Fields: columns, Points: make([]*protocol.Point, 0, self.pointBatchSize)} for pi.Valid() { p := pi.Point() seriesOutgoing.Points = append(seriesOutgoing.Points, p) if len(seriesOutgoing.Points) >= self.pointBatchSize { ok, err := yieldToProcessor(seriesOutgoing, processor, aliases) if !ok || err != nil { log.Debug("Stopping processing.") if err != nil { log.Error("Error while processing data: %v", err) return err } return nil } seriesOutgoing = &protocol.Series{Name: protocol.String(name), Fields: columns, Points: make([]*protocol.Point, 0, self.pointBatchSize)} } pi.Next() } if err := pi.Error(); err != nil { return err } //Yield remaining data if ok, err := yieldToProcessor(seriesOutgoing, processor, aliases); !ok || err != nil { log.Debug("Stopping processing remaining points...") if err != nil { log.Error("Error while processing data: %v", err) return err } } log.Debug("Finished running query %s", query.GetQueryString()) return nil }
func (self *Shard) executeQueryForSeries(querySpec *parser.QuerySpec, seriesName string, columns []string, processor cluster.QueryProcessor) error { startTimeBytes := self.byteArrayForTime(querySpec.GetStartTime()) endTimeBytes := self.byteArrayForTime(querySpec.GetEndTime()) fields, err := self.getFieldsForSeries(querySpec.Database(), seriesName, columns) if err != nil { log.Error("Error looking up fields for %s: %s", seriesName, err) return err } fieldCount := len(fields) rawColumnValues := make([]rawColumnValue, fieldCount, fieldCount) query := querySpec.SelectQuery() aliases := query.GetTableAliases(seriesName) if querySpec.IsSinglePointQuery() { series, err := self.fetchSinglePoint(querySpec, seriesName, fields) if err != nil { log.Error("Error reading a single point: %s", err) return err } if len(series.Points) > 0 { processor.YieldPoint(series.Name, series.Fields, series.Points[0]) } return nil } fieldNames, iterators := self.getIterators(fields, startTimeBytes, endTimeBytes, query.Ascending) defer func() { for _, it := range iterators { it.Close() } }() seriesOutgoing := &protocol.Series{Name: protocol.String(seriesName), Fields: fieldNames, Points: make([]*protocol.Point, 0, self.pointBatchSize)} // TODO: clean up, this is super gnarly // optimize for the case where we're pulling back only a single column or aggregate buffer := bytes.NewBuffer(nil) valueBuffer := proto.NewBuffer(nil) for { isValid := false point := &protocol.Point{Values: make([]*protocol.FieldValue, fieldCount, fieldCount)} for i, it := range iterators { if rawColumnValues[i].value != nil || !it.Valid() { if err := it.Error(); err != nil { return err } continue } key := it.Key() if len(key) < 16 { continue } if !isPointInRange(fields[i].IdAsBytes(), startTimeBytes, endTimeBytes, key) { continue } value := it.Value() sequenceNumber := key[16:] rawTime := key[8:16] rawColumnValues[i] = rawColumnValue{time: rawTime, sequence: sequenceNumber, value: value} } var pointTimeRaw []byte var pointSequenceRaw []byte // choose the highest (or lowest in case of ascending queries) timestamp // and sequence number. that will become the timestamp and sequence of // the next point. for _, value := range rawColumnValues { if value.value == nil { continue } pointTimeRaw, pointSequenceRaw = value.updatePointTimeAndSequence(pointTimeRaw, pointSequenceRaw, query.Ascending) } for i, iterator := range iterators { // if the value is nil or doesn't match the point's timestamp and sequence number // then skip it if rawColumnValues[i].value == nil || !bytes.Equal(rawColumnValues[i].time, pointTimeRaw) || !bytes.Equal(rawColumnValues[i].sequence, pointSequenceRaw) { point.Values[i] = &protocol.FieldValue{IsNull: &TRUE} continue } // if we emitted at lease one column, then we should keep // trying to get more points isValid = true // advance the iterator to read a new value in the next iteration if query.Ascending { iterator.Next() } else { iterator.Prev() } fv := &protocol.FieldValue{} valueBuffer.SetBuf(rawColumnValues[i].value) err := valueBuffer.Unmarshal(fv) if err != nil { log.Error("Error while running query: %s", err) return err } point.Values[i] = fv rawColumnValues[i].value = nil } var sequence uint64 var t uint64 // set the point sequence number and timestamp buffer.Reset() buffer.Write(pointSequenceRaw) binary.Read(buffer, binary.BigEndian, &sequence) buffer.Reset() buffer.Write(pointTimeRaw) binary.Read(buffer, binary.BigEndian, &t) time := self.convertUintTimestampToInt64(&t) point.SetTimestampInMicroseconds(time) point.SequenceNumber = &sequence // stop the loop if we ran out of points if !isValid { break } shouldContinue := true seriesOutgoing.Points = append(seriesOutgoing.Points, point) if len(seriesOutgoing.Points) >= self.pointBatchSize { for _, alias := range aliases { series := &protocol.Series{ Name: proto.String(alias), Fields: fieldNames, Points: seriesOutgoing.Points, } if !processor.YieldSeries(series) { log.Info("Stopping processing") shouldContinue = false } } seriesOutgoing = &protocol.Series{Name: protocol.String(seriesName), Fields: fieldNames, Points: make([]*protocol.Point, 0, self.pointBatchSize)} } if !shouldContinue { break } } //Yield remaining data for _, alias := range aliases { log.Debug("Final Flush %s", alias) series := &protocol.Series{Name: protocol.String(alias), Fields: seriesOutgoing.Fields, Points: seriesOutgoing.Points} if !processor.YieldSeries(series) { log.Debug("Cancelled...") } } log.Debug("Finished running query %s", query.GetQueryString()) return nil }
func (self *ShardData) Query(querySpec *parser.QuerySpec, response chan *p.Response) { log.Debug("QUERY: shard %d, query '%s'", self.Id(), querySpec.GetQueryString()) defer common.RecoverFunc(querySpec.Database(), querySpec.GetQueryString(), func(err interface{}) { response <- &p.Response{Type: &endStreamResponse, ErrorMessage: p.String(fmt.Sprintf("%s", err))} }) // This is only for queries that are deletes or drops. They need to be sent everywhere as opposed to just the local or one of the remote shards. // But this boolean should only be set to true on the server that receives the initial query. if querySpec.RunAgainstAllServersInShard { if querySpec.IsDeleteFromSeriesQuery() { self.logAndHandleDeleteQuery(querySpec, response) } else if querySpec.IsDropSeriesQuery() { self.logAndHandleDropSeriesQuery(querySpec, response) } } if self.IsLocal { var processor QueryProcessor var err error if querySpec.IsListSeriesQuery() { processor = engine.NewListSeriesEngine(response) } else if querySpec.IsDeleteFromSeriesQuery() || querySpec.IsDropSeriesQuery() || querySpec.IsSinglePointQuery() { maxDeleteResults := 10000 processor = engine.NewPassthroughEngine(response, maxDeleteResults) } else { query := querySpec.SelectQuery() if self.ShouldAggregateLocally(querySpec) { log.Debug("creating a query engine") processor, err = engine.NewQueryEngine(query, response) if err != nil { response <- &p.Response{Type: &endStreamResponse, ErrorMessage: p.String(err.Error())} log.Error("Error while creating engine: %s", err) return } processor.SetShardInfo(int(self.Id()), self.IsLocal) } else if query.HasAggregates() { maxPointsToBufferBeforeSending := 1000 log.Debug("creating a passthrough engine") processor = engine.NewPassthroughEngine(response, maxPointsToBufferBeforeSending) } else { maxPointsToBufferBeforeSending := 1000 log.Debug("creating a passthrough engine with limit") processor = engine.NewPassthroughEngineWithLimit(response, maxPointsToBufferBeforeSending, query.Limit) } if query.GetFromClause().Type != parser.FromClauseInnerJoin { // Joins do their own filtering since we need to get all // points before filtering. This is due to the fact that some // where expressions will be difficult to compute before the // points are joined together, think where clause with // left.column = 'something' or right.column = // 'something_else'. We can't filter the individual series // separately. The filtering happens in merge.go:55 processor = engine.NewFilteringEngine(query, processor) } } shard, err := self.store.GetOrCreateShard(self.id) if err != nil { response <- &p.Response{Type: &endStreamResponse, ErrorMessage: p.String(err.Error())} log.Error("Error while getting shards: %s", err) return } defer self.store.ReturnShard(self.id) err = shard.Query(querySpec, processor) // if we call Close() in case of an error it will mask the error if err != nil { response <- &p.Response{Type: &endStreamResponse, ErrorMessage: p.String(err.Error())} return } processor.Close() response <- &p.Response{Type: &endStreamResponse} return } if server := self.randomHealthyServer(); server != nil { log.Debug("Querying server %d for shard %d", server.GetId(), self.Id()) request := self.createRequest(querySpec) server.MakeRequest(request, response) return } message := fmt.Sprintf("No servers up to query shard %d", self.id) response <- &p.Response{Type: &endStreamResponse, ErrorMessage: &message} log.Error(message) }