Beispiel #1
0
// writeToShards writes points to a shard and ensures a write consistency level has been met.  If the write
// partially succeeds, ErrPartialWrite is returned.
func (w *PointsWriter) writeToShard(shard *meta.ShardInfo, database, retentionPolicy string,
	consistency ConsistencyLevel, points []tsdb.Point) error {
	// The required number of writes to achieve the requested consistency level
	required := len(shard.OwnerIDs)
	switch consistency {
	case ConsistencyLevelAny, ConsistencyLevelOne:
		required = 1
	case ConsistencyLevelQuorum:
		required = required/2 + 1
	}

	// response channel for each shard writer go routine
	ch := make(chan error, len(shard.OwnerIDs))

	for _, nodeID := range shard.OwnerIDs {
		go func(shardID, nodeID uint64, points []tsdb.Point) {
			if w.MetaStore.NodeID() == nodeID {
				err := w.TSDBStore.WriteToShard(shardID, points)
				// If we've written to shard that should exist on the current node, but the store has
				// not actually created this shard, tell it to create it and retry the write
				if err == tsdb.ErrShardNotFound {
					err = w.TSDBStore.CreateShard(database, retentionPolicy, shardID)
					if err != nil {
						ch <- err
						return
					}
					err = w.TSDBStore.WriteToShard(shardID, points)
				}
				ch <- err
				return
			}

			err := w.ShardWriter.WriteShard(shardID, nodeID, points)
			if err != nil && tsdb.IsRetryable(err) {
				// The remote write failed so queue it via hinted handoff
				hherr := w.HintedHandoff.WriteShard(shardID, nodeID, points)

				// If the write consistency level is ANY, then a successful hinted handoff can
				// be considered a successful write so send nil to the response channel
				// otherwise, let the original error propogate to the response channel
				if hherr == nil && consistency == ConsistencyLevelAny {
					ch <- nil
					return
				}
			}
			ch <- err

		}(shard.ID, nodeID, points)
	}

	var wrote int
	timeout := time.After(w.WriteTimeout)
	var writeError error
	for _, nodeID := range shard.OwnerIDs {
		select {
		case <-w.closing:
			return ErrWriteFailed
		case <-timeout:
			// return timeout error to caller
			return ErrTimeout
		case err := <-ch:
			// If the write returned an error, continue to the next response
			if err != nil {
				w.Logger.Printf("write failed for shard %d on node %d: %v", shard.ID, nodeID, err)

				// Keep track of the first error we see to return back to the client
				if writeError == nil {
					writeError = err
				}
				continue
			}

			wrote += 1
		}
	}

	// We wrote the required consistency level
	if wrote >= required {
		return nil
	}

	if wrote > 0 {
		return ErrPartialWrite
	}

	if writeError != nil {
		return fmt.Errorf("write failed: %v", writeError)
	}

	return ErrWriteFailed
}
Beispiel #2
0
func (p *Processor) Process() error {
	p.mu.RLock()
	defer p.mu.RUnlock()

	res := make(chan error, len(p.queues))
	for nodeID, q := range p.queues {
		go func(nodeID uint64, q *queue) {

			// Log how many writes we successfully sent at the end
			var sent int
			start := time.Now()
			defer func(start time.Time) {
				if sent > 0 {
					p.Logger.Printf("%d queued writes sent to node %d in %s", sent, nodeID, time.Since(start))
				}
			}(start)

			limiter := NewRateLimiter(p.retryRateLimit)
			for {
				// Get the current block from the queue
				buf, err := q.Current()
				if err != nil {
					res <- nil
					break
				}

				// unmarshal the byte slice back to shard ID and points
				shardID, points, err := p.unmarshalWrite(buf)
				if err != nil {
					// TODO: If we ever get and error here, we should probably drop the
					// the write and let anti-entropy resolve it.  This would be an urecoverable
					// error and could block the queue indefinitely.
					res <- err
					return
				}

				// Try to send the write to the node
				if err := p.writer.WriteShard(shardID, nodeID, points); err != nil && tsdb.IsRetryable(err) {
					p.Logger.Printf("remote write failed: %v", err)
					res <- nil
					break
				}

				// If we get here, the write succeeded so advance the queue to the next item
				if err := q.Advance(); err != nil {
					res <- err
					return
				}

				sent += 1

				// Update how many bytes we've sent
				limiter.Update(len(buf))

				// Block to maintain the throughput rate
				time.Sleep(limiter.Delay())

			}
		}(nodeID, q)
	}

	for range p.queues {
		err := <-res
		if err != nil {
			return err
		}
	}
	return nil
}