// writeToShards writes points to a shard and ensures a write consistency level has been met. If the write // partially succeeds, ErrPartialWrite is returned. func (w *PointsWriter) writeToShard(shard *meta.ShardInfo, database, retentionPolicy string, consistency ConsistencyLevel, points []tsdb.Point) error { // The required number of writes to achieve the requested consistency level required := len(shard.OwnerIDs) switch consistency { case ConsistencyLevelAny, ConsistencyLevelOne: required = 1 case ConsistencyLevelQuorum: required = required/2 + 1 } // response channel for each shard writer go routine ch := make(chan error, len(shard.OwnerIDs)) for _, nodeID := range shard.OwnerIDs { go func(shardID, nodeID uint64, points []tsdb.Point) { if w.MetaStore.NodeID() == nodeID { err := w.TSDBStore.WriteToShard(shardID, points) // If we've written to shard that should exist on the current node, but the store has // not actually created this shard, tell it to create it and retry the write if err == tsdb.ErrShardNotFound { err = w.TSDBStore.CreateShard(database, retentionPolicy, shardID) if err != nil { ch <- err return } err = w.TSDBStore.WriteToShard(shardID, points) } ch <- err return } err := w.ShardWriter.WriteShard(shardID, nodeID, points) if err != nil && tsdb.IsRetryable(err) { // The remote write failed so queue it via hinted handoff hherr := w.HintedHandoff.WriteShard(shardID, nodeID, points) // If the write consistency level is ANY, then a successful hinted handoff can // be considered a successful write so send nil to the response channel // otherwise, let the original error propogate to the response channel if hherr == nil && consistency == ConsistencyLevelAny { ch <- nil return } } ch <- err }(shard.ID, nodeID, points) } var wrote int timeout := time.After(w.WriteTimeout) var writeError error for _, nodeID := range shard.OwnerIDs { select { case <-w.closing: return ErrWriteFailed case <-timeout: // return timeout error to caller return ErrTimeout case err := <-ch: // If the write returned an error, continue to the next response if err != nil { w.Logger.Printf("write failed for shard %d on node %d: %v", shard.ID, nodeID, err) // Keep track of the first error we see to return back to the client if writeError == nil { writeError = err } continue } wrote += 1 } } // We wrote the required consistency level if wrote >= required { return nil } if wrote > 0 { return ErrPartialWrite } if writeError != nil { return fmt.Errorf("write failed: %v", writeError) } return ErrWriteFailed }
func (p *Processor) Process() error { p.mu.RLock() defer p.mu.RUnlock() res := make(chan error, len(p.queues)) for nodeID, q := range p.queues { go func(nodeID uint64, q *queue) { // Log how many writes we successfully sent at the end var sent int start := time.Now() defer func(start time.Time) { if sent > 0 { p.Logger.Printf("%d queued writes sent to node %d in %s", sent, nodeID, time.Since(start)) } }(start) limiter := NewRateLimiter(p.retryRateLimit) for { // Get the current block from the queue buf, err := q.Current() if err != nil { res <- nil break } // unmarshal the byte slice back to shard ID and points shardID, points, err := p.unmarshalWrite(buf) if err != nil { // TODO: If we ever get and error here, we should probably drop the // the write and let anti-entropy resolve it. This would be an urecoverable // error and could block the queue indefinitely. res <- err return } // Try to send the write to the node if err := p.writer.WriteShard(shardID, nodeID, points); err != nil && tsdb.IsRetryable(err) { p.Logger.Printf("remote write failed: %v", err) res <- nil break } // If we get here, the write succeeded so advance the queue to the next item if err := q.Advance(); err != nil { res <- err return } sent += 1 // Update how many bytes we've sent limiter.Update(len(buf)) // Block to maintain the throughput rate time.Sleep(limiter.Delay()) } }(nodeID, q) } for range p.queues { err := <-res if err != nil { return err } } return nil }