func (e *DeleteNode) runDelete(snapshot []byte) error { e.fieldsDeleted = &expvar.Int{} e.tagsDeleted = &expvar.Int{} e.statMap.Set(statsFieldsDeleted, e.fieldsDeleted) e.statMap.Set(statsTagsDeleted, e.tagsDeleted) switch e.Provides() { case pipeline.StreamEdge: for p, ok := e.ins[0].NextPoint(); ok; p, ok = e.ins[0].NextPoint() { e.timer.Start() p.Fields, p.Tags = e.doDeletes(p.Fields, p.Tags) // Check if we deleted a group by dimension updateDims := false for _, dim := range p.Dimensions.TagNames { if !e.tags[dim] { updateDims = true break } } if updateDims { newDims := make([]string, 0, len(p.Dimensions.TagNames)) for _, dim := range p.Dimensions.TagNames { if !e.tags[dim] { newDims = append(newDims, dim) } } p.Dimensions.TagNames = newDims p.Group = models.ToGroupID(p.Name, p.Tags, p.Dimensions) } e.timer.Stop() for _, child := range e.outs { err := child.CollectPoint(p) if err != nil { return err } } } case pipeline.BatchEdge: for b, ok := e.ins[0].NextBatch(); ok; b, ok = e.ins[0].NextBatch() { e.timer.Start() for i := range b.Points { b.Points[i].Fields, b.Points[i].Tags = e.doDeletes(b.Points[i].Fields, b.Points[i].Tags) } _, newTags := e.doDeletes(nil, b.Tags) if len(newTags) != len(b.Tags) { b.Tags = newTags b.Group = models.ToGroupID(b.Name, b.Tags, b.PointDimensions()) } e.timer.Stop() for _, child := range e.outs { err := child.CollectBatch(b) if err != nil { return err } } } } return nil }
func BenchmarkCollectPoint(b *testing.B) { name := "point" b.ReportAllocs() ls := &logService{} e := newEdge("BCollectPoint", "parent", "child", pipeline.StreamEdge, defaultEdgeBufferSize, ls) p := models.Point{ Name: name, Tags: models.Tags{ "tag1": "value1", "tag2": "value2", "tag3": "value3", "tag4": "value4", }, Group: models.ToGroupID(name, nil, models.Dimensions{}), Fields: models.Fields{ "field1": 42, "field2": 4.2, "field3": 49, "field4": 4.9, }, } b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { e.CollectPoint(p) e.NextPoint() } }) }
func setGroupOnPoint(p models.Point, allDimensions bool, dimensions models.Dimensions) models.Point { if allDimensions { dimensions.TagNames = models.SortedKeys(p.Tags) } p.Group = models.ToGroupID(p.Name, p.Tags, dimensions) p.Dimensions = dimensions return p }
// Replay the batch data from a single source func readBatchFromIO(data io.ReadCloser, batches chan<- models.Batch) error { defer close(batches) defer data.Close() dec := json.NewDecoder(data) for dec.More() { var b models.Batch err := dec.Decode(&b) if err != nil { return err } if len(b.Points) == 0 { // do nothing continue } if b.Group == "" { b.Group = models.ToGroupID( b.Name, b.Tags, models.Dimensions{ ByName: b.ByName, TagNames: models.SortedKeys(b.Tags), }, ) } // Add tags to all points if len(b.Tags) > 0 { for i := range b.Points { if len(b.Points[i].Tags) == 0 { b.Points[i].Tags = b.Tags } } } batches <- b } return nil }
// The purpose of this method is to match more specific points // with the less specific points as they arrive. // // Where 'more specific' means, that a point has more dimensions than the join.on dimensions. func (j *JoinNode) matchPoints(p srcPoint, groupErrs chan<- error) { // Specific points may be sent to the joinset without a matching point, but not the other way around. // This is because the specific points have the needed specific tag data. // The joinset will later handle the fill inner/outer join operations. j.mu.Lock() defer j.mu.Unlock() if !j.allReported { j.reported[p.src] = true j.allReported = len(j.reported) == len(j.ins) } t := p.p.PointTime().Round(j.j.Tolerance) groupId := models.ToGroupID( p.p.PointName(), p.p.PointTags(), models.Dimensions{ ByName: p.p.PointDimensions().ByName, TagNames: j.j.Dimensions, }, ) // Update current srcGroup lowMark srcG := srcGroup{src: p.src, groupId: groupId} j.lowMarks[srcG] = t // Determine lowMark, the oldest time per parent per group. var lowMark time.Time if j.allReported { for s := 0; s < len(j.ins); s++ { sg := srcGroup{src: s, groupId: groupId} if lm := j.lowMarks[sg]; lowMark.IsZero() || lm.Before(lowMark) { lowMark = lm } } } // Check for cached specific points that can now be sent alone. if j.allReported { // Send all cached specific point that won't match anymore. var i int buf := j.specificGroupsBuffer[groupId] l := len(buf) for i = 0; i < l; i++ { st := buf[i].p.PointTime().Round(j.j.Tolerance) if st.Before(lowMark) { // Send point by itself since it won't get a match. j.sendSpecificPoint(buf[i], groupErrs) } else { break } } // Remove all sent points. j.specificGroupsBuffer[groupId] = buf[i:] } if len(p.p.PointDimensions().TagNames) > len(j.j.Dimensions) { // We have a specific point and three options: // 1. Find the cached match point and send both to group. // 2. Cache the specific point for later. // 3. Send the specific point alone if it is no longer possible that a match will arrive. // Search for a match. // Also purge any old match points. matches := j.matchGroupsBuffer[groupId] matched := false var i int l := len(matches) for i = 0; i < l; i++ { match := matches[i] pt := match.p.PointTime().Round(j.j.Tolerance) if pt.Equal(t) { // Option 1, send both points j.sendMatchPoint(p, match, groupErrs) matched = true } if !pt.Before(lowMark) { break } } if j.allReported { // Can't trust lowMark until all parents have reported. // Remove any unneeded match points. j.matchGroupsBuffer[groupId] = matches[i:] } // If the point didn't match that leaves us with options 2 and 3. if !matched { if j.allReported && t.Before(lowMark) { // Option 3 // Send this specific point by itself since it won't get a match. j.sendSpecificPoint(p, groupErrs) } else { // Option 2 // Cache this point for when its match arrives. j.specificGroupsBuffer[groupId] = append(j.specificGroupsBuffer[groupId], p) } } } else { // Cache match point. j.matchGroupsBuffer[groupId] = append(j.matchGroupsBuffer[groupId], p) // Send all specific points that match, to the group. var i int buf := j.specificGroupsBuffer[groupId] l := len(buf) for i = 0; i < l; i++ { st := buf[i].p.PointTime().Round(j.j.Tolerance) if st.Equal(t) { j.sendMatchPoint(buf[i], p, groupErrs) } else { break } } // Remove all sent points j.specificGroupsBuffer[groupId] = buf[i:] } }
func (g *GroupByNode) runGroupBy([]byte) error { dims := models.Dimensions{ ByName: g.g.ByMeasurementFlag, } switch g.Wants() { case pipeline.StreamEdge: dims.TagNames = g.dimensions for pt, ok := g.ins[0].NextPoint(); ok; pt, ok = g.ins[0].NextPoint() { g.timer.Start() pt = setGroupOnPoint(pt, g.allDimensions, dims) g.timer.Stop() for _, child := range g.outs { err := child.CollectPoint(pt) if err != nil { return err } } } default: var lastTime time.Time groups := make(map[models.GroupID]*models.Batch) for b, ok := g.ins[0].NextBatch(); ok; b, ok = g.ins[0].NextBatch() { g.timer.Start() if !b.TMax.Equal(lastTime) { lastTime = b.TMax // Emit all groups for id, group := range groups { for _, child := range g.outs { err := child.CollectBatch(*group) if err != nil { return err } } // Remove from groups delete(groups, id) } } for _, p := range b.Points { if g.allDimensions { dims.TagNames = models.SortedKeys(p.Tags) } else { dims.TagNames = g.dimensions } groupID := models.ToGroupID(b.Name, p.Tags, dims) group, ok := groups[groupID] if !ok { tags := make(map[string]string, len(dims.TagNames)) for _, dim := range dims.TagNames { tags[dim] = p.Tags[dim] } group = &models.Batch{ Name: b.Name, Group: groupID, TMax: b.TMax, ByName: b.ByName, Tags: tags, } groups[groupID] = group } group.Points = append(group.Points, p) } g.timer.Stop() } } return nil }