// writeToShards writes points to a shard and ensures a write consistency level has been met. If the write // partially succeeds, ErrPartialWrite is returned. func (w *PointsWriter) writeToShard(shard *meta.ShardInfo, database, retentionPolicy string, consistency ConsistencyLevel, points []models.Point) error { // The required number of writes to achieve the requested consistency level required := len(shard.Owners) switch consistency { case ConsistencyLevelAny, ConsistencyLevelOne: required = 1 case ConsistencyLevelQuorum: required = required/2 + 1 } // response channel for each shard writer go routine type AsyncWriteResult struct { Owner meta.ShardOwner Err error } ch := make(chan *AsyncWriteResult, len(shard.Owners)) for _, owner := range shard.Owners { go func(shardID uint64, owner meta.ShardOwner, points []models.Point) { if w.MetaStore.NodeID() == owner.NodeID { w.statMap.Add(statPointWriteReqLocal, int64(len(points))) err := w.TSDBStore.WriteToShard(shardID, points) // If we've written to shard that should exist on the current node, but the store has // not actually created this shard, tell it to create it and retry the write if err == tsdb.ErrShardNotFound { err = w.TSDBStore.CreateShard(database, retentionPolicy, shardID) if err != nil { ch <- &AsyncWriteResult{owner, err} return } err = w.TSDBStore.WriteToShard(shardID, points) } ch <- &AsyncWriteResult{owner, err} return } w.statMap.Add(statPointWriteReqRemote, int64(len(points))) err := w.ShardWriter.WriteShard(shardID, owner.NodeID, points) if err != nil && tsdb.IsRetryable(err) { // The remote write failed so queue it via hinted handoff w.statMap.Add(statWritePointReqHH, int64(len(points))) hherr := w.HintedHandoff.WriteShard(shardID, owner.NodeID, points) // If the write consistency level is ANY, then a successful hinted handoff can // be considered a successful write so send nil to the response channel // otherwise, let the original error propogate to the response channel if hherr == nil && consistency == ConsistencyLevelAny { ch <- &AsyncWriteResult{owner, nil} return } } ch <- &AsyncWriteResult{owner, err} }(shard.ID, owner, points) } var wrote int timeout := time.After(w.WriteTimeout) var writeError error for range shard.Owners { select { case <-w.closing: return ErrWriteFailed case <-timeout: w.statMap.Add(statWriteTimeout, 1) // return timeout error to caller return ErrTimeout case result := <-ch: // If the write returned an error, continue to the next response if result.Err != nil { w.statMap.Add(statWriteErr, 1) w.Logger.Printf("write failed for shard %d on node %d: %v", shard.ID, result.Owner.NodeID, result.Err) // Keep track of the first error we see to return back to the client if writeError == nil { writeError = result.Err } continue } wrote++ // We wrote the required consistency level if wrote >= required { w.statMap.Add(statWriteOK, 1) return nil } } } if wrote > 0 { w.statMap.Add(statWritePartial, 1) return ErrPartialWrite } if writeError != nil { return fmt.Errorf("write failed: %v", writeError) } return ErrWriteFailed }
func (p *Processor) Process() error { p.mu.RLock() defer p.mu.RUnlock() activeQueues, err := p.activeQueues() if err != nil { return err } res := make(chan error, len(activeQueues)) for nodeID, q := range activeQueues { go func(nodeID uint64, q *queue) { // Log how many writes we successfully sent at the end var sent int start := time.Now() defer func(start time.Time) { if sent > 0 { p.Logger.Printf("%d queued writes sent to node %d in %s", sent, nodeID, time.Since(start)) } }(start) limiter := NewRateLimiter(p.retryRateLimit) for { // Get the current block from the queue buf, err := q.Current() if err != nil { if err != io.EOF { p.nodeStatMaps[nodeID].Add(currentErr, 1) } res <- nil break } // unmarshal the byte slice back to shard ID and points shardID, points, err := p.unmarshalWrite(buf) if err != nil { p.nodeStatMaps[nodeID].Add(unmarshalErr, 1) p.Logger.Printf("unmarshal write failed: %v", err) if err := q.Advance(); err != nil { p.nodeStatMaps[nodeID].Add(advanceErr, 1) res <- err } // Skip and try the next block. continue } // Try to send the write to the node if err := p.writer.WriteShard(shardID, nodeID, points); err != nil && tsdb.IsRetryable(err) { p.nodeStatMaps[nodeID].Add(writeErr, 1) p.Logger.Printf("remote write failed: %v", err) res <- nil break } p.updateShardStats(shardID, pointsWrite, int64(len(points))) p.nodeStatMaps[nodeID].Add(pointsWrite, int64(len(points))) // If we get here, the write succeeded so advance the queue to the next item if err := q.Advance(); err != nil { p.nodeStatMaps[nodeID].Add(advanceErr, 1) res <- err return } sent += 1 // Update how many bytes we've sent limiter.Update(len(buf)) p.updateShardStats(shardID, bytesWrite, int64(len(buf))) p.nodeStatMaps[nodeID].Add(bytesWrite, int64(len(buf))) // Block to maintain the throughput rate time.Sleep(limiter.Delay()) } }(nodeID, q) } for range activeQueues { err := <-res if err != nil { return err } } return nil }
func (p *Processor) Process() error { p.mu.RLock() defer p.mu.RUnlock() res := make(chan error, len(p.queues)) for nodeID, q := range p.queues { go func(nodeID uint64, q *queue) { // Log how many writes we successfully sent at the end var sent int start := time.Now() defer func(start time.Time) { if sent > 0 { p.Logger.Printf("%d queued writes sent to node %d in %s", sent, nodeID, time.Since(start)) } }(start) limiter := NewRateLimiter(p.retryRateLimit) for { // Get the current block from the queue buf, err := q.Current() if err != nil { res <- nil break } // unmarshal the byte slice back to shard ID and points shardID, points, err := p.unmarshalWrite(buf) if err != nil { // TODO: If we ever get and error here, we should probably drop the // the write and let anti-entropy resolve it. This would be an urecoverable // error and could block the queue indefinitely. res <- err return } // Try to send the write to the node if err := p.writer.WriteShard(shardID, nodeID, points); err != nil && tsdb.IsRetryable(err) { p.Logger.Printf("remote write failed: %v", err) res <- nil break } // If we get here, the write succeeded so advance the queue to the next item if err := q.Advance(); err != nil { res <- err return } sent += 1 // Update how many bytes we've sent limiter.Update(len(buf)) // Block to maintain the throughput rate time.Sleep(limiter.Delay()) } }(nodeID, q) } for range p.queues { err := <-res if err != nil { return err } } return nil }