func (self *ClusterConfiguration) getShardRange(querySpec QuerySpec, shards []*ShardData) []*ShardData { if querySpec.AllShardsQuery() { return shards } startTime := common.TimeToMicroseconds(querySpec.GetStartTime()) endTime := common.TimeToMicroseconds(querySpec.GetEndTime()) // the shards are always in descending order, if we have the following shards // [t + 20, t + 30], [t + 10, t + 20], [t, t + 10] // if we are querying [t + 5, t + 15], we have to find the first shard whose // startMicro is less than the end time of the query, // which is the second shard [t + 10, t + 20], then // start searching from this shard for the shard that has // endMicro less than the start time of the query, which is // no entry (sort.Search will return the length of the slice // in this case) so we return [t + 10, t + 20], [t, t + 10] // as expected startIndex := sort.Search(len(shards), func(n int) bool { return shards[n].startMicro < endTime }) if startIndex == len(shards) { return nil } endIndex := sort.Search(len(shards)-startIndex, func(n int) bool { return shards[n+startIndex].endMicro <= startTime }) return shards[startIndex : endIndex+startIndex] }
func (self *QueryParserSuite) TestGetQueryStringForContinuousQuery(c *C) { base := time.Now().Truncate(time.Minute) start := base.UTC() end := base.Add(time.Minute).UTC() startMicroseconds := common.TimeToMicroseconds(start.UTC()) - 1 endMicroseconds := common.TimeToMicroseconds(end.UTC()) inputQuery := "select count(c1) from s1 group by time(1m) into d1;" outputQuery := fmt.Sprintf("select count(c1) from s1 group by time(1m) where time > %du and time < %du", startMicroseconds, endMicroseconds) queries, err := ParseQuery(inputQuery) c.Assert(err, IsNil) c.Assert(queries, HasLen, 1) query := queries[0] c.Assert(query.SelectQuery, NotNil) selectQuery := query.SelectQuery c.Assert(selectQuery.GetQueryStringForContinuousQuery(start, end), Equals, outputQuery) // try to parse the query with the time condition queries, err = ParseQuery(selectQuery.GetQueryStringForContinuousQuery(start, end)) c.Assert(err, IsNil) query = queries[0] c.Assert(query.SelectQuery, NotNil) selectQuery = query.SelectQuery c.Assert(selectQuery.GetStartTime().Round(time.Second), Equals, start) c.Assert(selectQuery.GetEndTime(), Equals, end) }
func (self *SelectDeleteCommonQuery) GetQueryStringForContinuousQuery(start, end time.Time) string { queryString := self.GetQueryString() queryString = strings.TrimSuffix(queryString, ";") intoRegex, _ := regexp.Compile("(?i)\\s+into\\s+") components := intoRegex.Split(queryString, 2) queryString = components[0] startTime := common.TimeToMicroseconds(start) startTimeStr := strconv.FormatInt(startTime-1, 10) endTime := common.TimeToMicroseconds(end) endTimeStr := strconv.FormatInt(endTime, 10) if self.GetWhereCondition() == nil { queryString = queryString + " where " } else { queryString = queryString + " and " } if start.IsZero() { return queryString + "time < " + endTimeStr + "u" } else { return queryString + "time > " + startTimeStr + "u and time < " + endTimeStr + "u" } }
func NewShard(id uint32, startTime, endTime time.Time, shardType ShardType, durationIsSplit bool, wal WAL) *ShardData { return &ShardData{ id: id, startTime: startTime, endTime: endTime, wal: wal, startMicro: common.TimeToMicroseconds(startTime), endMicro: common.TimeToMicroseconds(endTime), serverIds: make([]uint32, 0), shardType: shardType, durationIsSplit: durationIsSplit, shardDuration: endTime.Sub(startTime), } }
func (self *QueryParserSuite) TestGetQueryStringWithTimeCondition(c *C) { now := time.Now().Round(time.Minute).UTC() micros := common.TimeToMicroseconds(now) for _, q := range []string{ "delete from foo", fmt.Sprintf("delete from foo where time < %du", micros), } { fmt.Printf("testing %s\n", q) queries, err := ParseQuery(q) c.Assert(err, IsNil) c.Assert(queries, HasLen, 1) _q := queries[0] c.Assert(_q.DeleteQuery, NotNil) q := _q.DeleteQuery // try to parse the query with the time condition queries, err = ParseQuery(q.GetQueryStringWithTimeCondition()) fmt.Printf("query: %s\n", q.GetQueryStringWithTimeCondition()) c.Assert(err, IsNil) _q = queries[0] c.Assert(_q.DeleteQuery, NotNil) q = _q.DeleteQuery c.Assert(q.GetEndTime().Round(time.Minute), Equals, now) } }
func (self *SelectDeleteCommonQuery) GetQueryStringWithTimeCondition() string { queryString := self.GetQueryString() if self.endTimeSet { return queryString } t := common.TimeToMicroseconds(self.GetEndTime()) timeStr := strconv.FormatInt(t, 10) condition := self.GetWhereCondition() if condition == nil { return queryString + " where time < " + timeStr + "u" } return queryString + " and time < " + timeStr + "u" }
func (self *LevelDbDatastore) DeleteRangeOfSeries(database, series string, startTime, endTime time.Time) error { columns := self.getColumnNamesForSeries(database, series) fields, err := self.getFieldsForSeries(database, series, columns) if err != nil { return err } startTimeBytes, endTimeBytes := self.byteArraysForStartAndEndTimes(common.TimeToMicroseconds(startTime), common.TimeToMicroseconds(endTime)) ro := levigo.NewReadOptions() defer ro.Close() ro.SetFillCache(false) rangesToCompact := make([]*levigo.Range, 0) for _, field := range fields { it := self.db.NewIterator(ro) defer it.Close() wb := levigo.NewWriteBatch() startKey := append(field.Id, startTimeBytes...) endKey := startKey it.Seek(startKey) if it.Valid() { if !bytes.Equal(it.Key()[:8], field.Id) { it.Next() if it.Valid() { startKey = it.Key() } } } for it = it; it.Valid(); it.Next() { k := it.Key() if len(k) < 16 || !bytes.Equal(k[:8], field.Id) || bytes.Compare(k[8:16], endTimeBytes) == 1 { break } wb.Delete(k) endKey = k } err = self.db.Write(self.writeOptions, wb) if err != nil { return err } rangesToCompact = append(rangesToCompact, &levigo.Range{startKey, endKey}) } for _, r := range rangesToCompact { self.db.CompactRange(*r) } return nil }
func (self *Shard) fetchSinglePoint(querySpec *parser.QuerySpec, series string, fields []*metastore.Field) (*protocol.Series, error) { query := querySpec.SelectQuery() fieldCount := len(fields) fieldNames := make([]string, 0, fieldCount) point := &protocol.Point{Values: make([]*protocol.FieldValue, 0, fieldCount)} timestamp := common.TimeToMicroseconds(query.GetStartTime()) sequenceNumber, err := query.GetSinglePointQuerySequenceNumber() if err != nil { return nil, err } timeAndSequenceBuffer := bytes.NewBuffer(make([]byte, 0, 16)) binary.Write(timeAndSequenceBuffer, binary.BigEndian, self.convertTimestampToUint(×tamp)) binary.Write(timeAndSequenceBuffer, binary.BigEndian, sequenceNumber) sequenceNumber_uint64 := uint64(sequenceNumber) point.SequenceNumber = &sequenceNumber_uint64 point.SetTimestampInMicroseconds(timestamp) timeAndSequenceBytes := timeAndSequenceBuffer.Bytes() for _, field := range fields { pointKeyBuff := bytes.NewBuffer(make([]byte, 0, 24)) pointKeyBuff.Write(field.IdAsBytes()) pointKeyBuff.Write(timeAndSequenceBytes) if data, err := self.db.Get(pointKeyBuff.Bytes()); err != nil { return nil, err } else { fieldValue := &protocol.FieldValue{} err := proto.Unmarshal(data, fieldValue) if err != nil { return nil, err } if data != nil { fieldNames = append(fieldNames, field.Name) point.Values = append(point.Values, fieldValue) } } } result := &protocol.Series{Name: &series, Fields: fieldNames, Points: []*protocol.Point{point}} return result, nil }
func (self *LevelDbDatastore) fetchSinglePoint(database, series string, fields []*Field, query *parser.SelectQuery) (*protocol.Series, error) { fieldCount := len(fields) fieldNames := make([]string, 0, fieldCount) point := &protocol.Point{Values: make([]*protocol.FieldValue, 0, fieldCount)} timestampBuffer := bytes.NewBuffer(make([]byte, 0, 8)) sequenceNumberBuffer := bytes.NewBuffer(make([]byte, 0, 8)) timestamp := common.TimeToMicroseconds(query.GetStartTime()) sequenceNumber, err := query.GetSinglePointQuerySequenceNumber() if err != nil { return nil, err } binary.Write(timestampBuffer, binary.BigEndian, self.convertTimestampToUint(×tamp)) binary.Write(sequenceNumberBuffer, binary.BigEndian, sequenceNumber) sequenceNumber_uint64 := uint64(sequenceNumber) point.SequenceNumber = &sequenceNumber_uint64 point.SetTimestampInMicroseconds(timestamp) for _, field := range fields { pointKey := append(append(field.Id, timestampBuffer.Bytes()...), sequenceNumberBuffer.Bytes()...) if data, err := self.db.Get(self.readOptions, pointKey); err != nil { return nil, err } else { fieldValue := &protocol.FieldValue{} err := proto.Unmarshal(data, fieldValue) if err != nil { return nil, err } if data != nil { fieldNames = append(fieldNames, field.Name) point.Values = append(point.Values, fieldValue) } } } result := &protocol.Series{Name: &series, Fields: fieldNames, Points: []*protocol.Point{point}} return result, nil }
func (self *Shard) byteArrayForTime(t time.Time) []byte { timeBuffer := bytes.NewBuffer(make([]byte, 0, 8)) timeMicro := common.TimeToMicroseconds(t) binary.Write(timeBuffer, binary.BigEndian, self.convertTimestampToUint(&timeMicro)) return timeBuffer.Bytes() }
func (self *Shard) deleteRangeOfSeries(database, series string, startTime, endTime time.Time) error { startTimeBytes, endTimeBytes := self.byteArraysForStartAndEndTimes(common.TimeToMicroseconds(startTime), common.TimeToMicroseconds(endTime)) return self.deleteRangeOfSeriesCommon(database, series, startTimeBytes, endTimeBytes) }
func (self *LevelDbDatastore) executeQueryForSeries(database, series string, columns []string, query *parser.SelectQuery, yield func(*protocol.Series) error, ringFilter func(database, series *string, time *int64) bool) error { startTimeBytes, endTimeBytes := self.byteArraysForStartAndEndTimes(common.TimeToMicroseconds(query.GetStartTime()), common.TimeToMicroseconds(query.GetEndTime())) emptyResult := &protocol.Series{Name: &series, Points: nil} fields, err := self.getFieldsForSeries(database, series, columns) if err != nil { // because a db is distributed across the cluster, it's possible we don't have the series indexed here. ignore switch err := err.(type) { case FieldLookupError: return yield(emptyResult) default: return err } } fieldCount := len(fields) rawColumnValues := make([]*rawColumnValue, fieldCount, fieldCount) if query.IsSinglePointQuery() { result, err := self.fetchSinglePoint(database, series, fields, query) if err != nil { return err } if err := yield(result); err != nil { return err } return nil } fieldNames, iterators := self.getIterators(fields, startTimeBytes, endTimeBytes, query.Ascending) result := &protocol.Series{Name: &series, Fields: fieldNames, Points: make([]*protocol.Point, 0)} limit := query.Limit shouldLimit := true if limit == 0 { limit = -1 shouldLimit = false } resultByteCount := 0 // TODO: clean up, this is super gnarly // optimize for the case where we're pulling back only a single column or aggregate for { isValid := false point := &protocol.Point{Values: make([]*protocol.FieldValue, fieldCount, fieldCount)} for i, it := range iterators { if rawColumnValues[i] != nil || !it.Valid() { continue } key := it.Key() if len(key) < 16 { continue } if !isPointInRange(fields[i].Id, startTimeBytes, endTimeBytes, key) { continue } value := it.Value() sequenceNumber := key[16:] rawTime := key[8:16] rawValue := &rawColumnValue{time: rawTime, sequence: sequenceNumber, value: value} rawColumnValues[i] = rawValue } var pointTimeRaw []byte var pointSequenceRaw []byte // choose the highest (or lowest in case of ascending queries) timestamp // and sequence number. that will become the timestamp and sequence of // the next point. for _, value := range rawColumnValues { if value == nil { continue } pointTimeRaw, pointSequenceRaw = value.updatePointTimeAndSequence(pointTimeRaw, pointSequenceRaw, query.Ascending) } for i, iterator := range iterators { // if the value is nil or doesn't match the point's timestamp and sequence number // then skip it if rawColumnValues[i] == nil || !bytes.Equal(rawColumnValues[i].time, pointTimeRaw) || !bytes.Equal(rawColumnValues[i].sequence, pointSequenceRaw) { point.Values[i] = &protocol.FieldValue{IsNull: &TRUE} continue } // if we emitted at lease one column, then we should keep // trying to get more points isValid = true // advance the iterator to read a new value in the next iteration if query.Ascending { iterator.Next() } else { iterator.Prev() } fv := &protocol.FieldValue{} resultByteCount += len(rawColumnValues[i].value) err := proto.Unmarshal(rawColumnValues[i].value, fv) if err != nil { return err } point.Values[i] = fv rawColumnValues[i] = nil } var sequence uint64 // set the point sequence number and timestamp binary.Read(bytes.NewBuffer(pointSequenceRaw), binary.BigEndian, &sequence) var t uint64 binary.Read(bytes.NewBuffer(pointTimeRaw), binary.BigEndian, &t) time := self.convertUintTimestampToInt64(&t) point.SetTimestampInMicroseconds(time) point.SequenceNumber = &sequence // stop the loop if we ran out of points if !isValid { break } limit -= 1 if ringFilter != nil && ringFilter(&database, &series, point.Timestamp) { continue } result.Points = append(result.Points, point) // add byte count for the timestamp and the sequence resultByteCount += 16 // check if we should send the batch along if resultByteCount > MAX_SERIES_SIZE || (shouldLimit && limit == 0) { dropped, err := self.sendBatch(query, result, yield) if err != nil { return err } limit += dropped resultByteCount = 0 result = &protocol.Series{Name: &series, Fields: fieldNames, Points: make([]*protocol.Point, 0)} } if shouldLimit && limit < 1 { break } } if _, err := self.sendBatch(query, result, yield); err != nil { return err } _, err = self.sendBatch(query, emptyResult, yield) return err }
func (self *LevelDbDatastore) executeQueryForSeries(database, series string, columns []string, query *parser.Query, yield func(*protocol.Series) error) error { startTimeBytes, endTimeBytes := self.byteArraysForStartAndEndTimes(common.TimeToMicroseconds(query.GetStartTime()), common.TimeToMicroseconds(query.GetEndTime())) fields, err := self.getFieldsForSeries(database, series, columns) if err != nil { return err } fieldCount := len(fields) prefixes := make([][]byte, fieldCount, fieldCount) iterators := make([]*levigo.Iterator, fieldCount, fieldCount) fieldNames := make([]string, len(fields)) // start the iterators to go through the series data for i, field := range fields { fieldNames[i] = field.Name prefixes[i] = field.Id iterators[i] = self.db.NewIterator(self.readOptions) if query.Ascending { iterators[i].Seek(append(field.Id, startTimeBytes...)) } else { iterators[i].Seek(append(append(field.Id, endTimeBytes...), MAX_SEQUENCE...)) if iterators[i].Valid() { iterators[i].Prev() } } } result := &protocol.Series{Name: &series, Fields: fieldNames, Points: make([]*protocol.Point, 0)} rawColumnValues := make([]*rawColumnValue, fieldCount, fieldCount) isValid := true limit := query.Limit if limit == 0 { limit = MAX_POINTS_TO_SCAN } resultByteCount := 0 // TODO: clean up, this is super gnarly // optimize for the case where we're pulling back only a single column or aggregate for isValid { isValid = false latestTimeRaw := make([]byte, 8, 8) latestSequenceRaw := make([]byte, 8, 8) point := &protocol.Point{Values: make([]*protocol.FieldValue, fieldCount, fieldCount)} for i, it := range iterators { if rawColumnValues[i] == nil && it.Valid() { k := it.Key() if len(k) >= 16 { t := k[8:16] if bytes.Equal(k[:8], fields[i].Id) && bytes.Compare(t, startTimeBytes) > -1 && bytes.Compare(t, endTimeBytes) < 1 { v := it.Value() s := k[16:] rawColumnValues[i] = &rawColumnValue{time: t, sequence: s, value: v} timeCompare := bytes.Compare(t, latestTimeRaw) if timeCompare == 1 { latestTimeRaw = t latestSequenceRaw = s } else if timeCompare == 0 { if bytes.Compare(s, latestSequenceRaw) == 1 { latestSequenceRaw = s } } } } } } for i, iterator := range iterators { if rawColumnValues[i] != nil && bytes.Equal(rawColumnValues[i].time, latestTimeRaw) && bytes.Equal(rawColumnValues[i].sequence, latestSequenceRaw) { isValid = true if query.Ascending { iterator.Next() } else { iterator.Prev() } fv := &protocol.FieldValue{} err := proto.Unmarshal(rawColumnValues[i].value, fv) if err != nil { return err } resultByteCount += len(rawColumnValues[i].value) point.Values[i] = fv var t uint64 binary.Read(bytes.NewBuffer(rawColumnValues[i].time), binary.BigEndian, &t) time := self.convertUintTimestampToInt64(&t) var sequence uint64 binary.Read(bytes.NewBuffer(rawColumnValues[i].sequence), binary.BigEndian, &sequence) seq32 := uint32(sequence) point.SetTimestampInMicroseconds(time) point.SequenceNumber = &seq32 rawColumnValues[i] = nil } } if isValid { limit -= 1 result.Points = append(result.Points, point) // add byte count for the timestamp and the sequence resultByteCount += 16 // check if we should send the batch along if resultByteCount > MAX_SERIES_SIZE { filteredResult, _ := Filter(query, result) if err := yield(filteredResult); err != nil { return err } resultByteCount = 0 result = &protocol.Series{Name: &series, Fields: fieldNames, Points: make([]*protocol.Point, 0)} } } if limit < 1 { break } } filteredResult, _ := Filter(query, result) if err := yield(filteredResult); err != nil { return err } emptyResult := &protocol.Series{Name: &series, Fields: fieldNames, Points: nil} return yield(emptyResult) }
func (self *LevelDbDatastore) executeQueryForSeries(database, series string, columns []string, query *parser.Query, yield func(*protocol.Series) error) error { startTimeBytes, endTimeBytes := self.byteArraysForStartAndEndTimes(common.TimeToMicroseconds(query.GetStartTime()), common.TimeToMicroseconds(query.GetEndTime())) fields, err := self.getFieldsForSeries(database, series, columns) if err != nil { return err } fieldCount := len(fields) fieldNames, iterators := self.getIterators(fields, startTimeBytes, endTimeBytes, query.Ascending) // iterators := result := &protocol.Series{Name: &series, Fields: fieldNames, Points: make([]*protocol.Point, 0)} rawColumnValues := make([]*rawColumnValue, fieldCount, fieldCount) limit := query.Limit if limit == 0 { limit = MAX_POINTS_TO_SCAN } resultByteCount := 0 // TODO: clean up, this is super gnarly // optimize for the case where we're pulling back only a single column or aggregate for { isValid := false point := &protocol.Point{Values: make([]*protocol.FieldValue, fieldCount, fieldCount)} for i, it := range iterators { if rawColumnValues[i] != nil || !it.Valid() { continue } key := it.Key() if len(key) < 16 { continue } if !isPointInRange(fields[i].Id, startTimeBytes, endTimeBytes, key) { continue } time := key[8:16] value := it.Value() sequenceNumber := key[16:] rawValue := &rawColumnValue{time: time, sequence: sequenceNumber, value: value} rawColumnValues[i] = rawValue } var pointTimeRaw []byte var pointSequenceRaw []byte // choose the highest (or lowest in case of ascending queries) timestamp // and sequence number. that will become the timestamp and sequence of // the next point. for _, value := range rawColumnValues { if value == nil { continue } pointTimeRaw, pointSequenceRaw = value.updatePointTimeAndSequence(pointTimeRaw, pointSequenceRaw, query.Ascending) } for i, iterator := range iterators { // if the value is nil, or doesn't match the point's timestamp and sequence number // then skip it if rawColumnValues[i] == nil || !bytes.Equal(rawColumnValues[i].time, pointTimeRaw) || !bytes.Equal(rawColumnValues[i].sequence, pointSequenceRaw) { continue } // if we emitted at lease one column, then we should keep // trying to get more points isValid = true // advance the iterator to read a new value in the next iteration if query.Ascending { iterator.Next() } else { iterator.Prev() } fv := &protocol.FieldValue{} err := proto.Unmarshal(rawColumnValues[i].value, fv) if err != nil { return err } resultByteCount += len(rawColumnValues[i].value) point.Values[i] = fv var t uint64 binary.Read(bytes.NewBuffer(rawColumnValues[i].time), binary.BigEndian, &t) time := self.convertUintTimestampToInt64(&t) var sequence uint64 binary.Read(bytes.NewBuffer(rawColumnValues[i].sequence), binary.BigEndian, &sequence) seq32 := uint32(sequence) point.SetTimestampInMicroseconds(time) point.SequenceNumber = &seq32 rawColumnValues[i] = nil } // stop the loop if we ran out of points if !isValid { break } limit -= 1 result.Points = append(result.Points, point) // add byte count for the timestamp and the sequence resultByteCount += 16 // check if we should send the batch along if resultByteCount > MAX_SERIES_SIZE || limit < 1 { dropped, err := self.sendBatch(query, result, yield) if err != nil { return err } limit += dropped resultByteCount = 0 result = &protocol.Series{Name: &series, Fields: fieldNames, Points: make([]*protocol.Point, 0)} } if limit < 1 { break } } if _, err := self.sendBatch(query, result, yield); err != nil { return err } emptyResult := &protocol.Series{Name: &series, Fields: fieldNames, Points: nil} _, err = self.sendBatch(query, emptyResult, yield) return err }