// writeToShards writes points to a shard and ensures a write consistency level has been met.  If the write
// partially succeeds, ErrPartialWrite is returned.
func (w *PointsWriter) writeToShard(shard *meta.ShardInfo, database, retentionPolicy string,
	consistency ConsistencyLevel, points []models.Point) error {
	// The required number of writes to achieve the requested consistency level
	required := len(shard.Owners)
	switch consistency {
	case ConsistencyLevelAny, ConsistencyLevelOne:
		required = 1
	case ConsistencyLevelQuorum:
		required = required/2 + 1
	}

	// response channel for each shard writer go routine
	type AsyncWriteResult struct {
		Owner meta.ShardOwner
		Err   error
	}
	ch := make(chan *AsyncWriteResult, len(shard.Owners))

	for _, owner := range shard.Owners {
		go func(shardID uint64, owner meta.ShardOwner, points []models.Point) {
			if w.MetaStore.NodeID() == owner.NodeID {
				w.statMap.Add(statPointWriteReqLocal, int64(len(points)))

				err := w.TSDBStore.WriteToShard(shardID, points)
				// If we've written to shard that should exist on the current node, but the store has
				// not actually created this shard, tell it to create it and retry the write
				if err == tsdb.ErrShardNotFound {
					err = w.TSDBStore.CreateShard(database, retentionPolicy, shardID)
					if err != nil {
						ch <- &AsyncWriteResult{owner, err}
						return
					}
					err = w.TSDBStore.WriteToShard(shardID, points)
				}
				ch <- &AsyncWriteResult{owner, err}
				return
			}

			w.statMap.Add(statPointWriteReqRemote, int64(len(points)))
			err := w.ShardWriter.WriteShard(shardID, owner.NodeID, points)
			if err != nil && tsdb.IsRetryable(err) {
				// The remote write failed so queue it via hinted handoff
				w.statMap.Add(statWritePointReqHH, int64(len(points)))
				hherr := w.HintedHandoff.WriteShard(shardID, owner.NodeID, points)

				// If the write consistency level is ANY, then a successful hinted handoff can
				// be considered a successful write so send nil to the response channel
				// otherwise, let the original error propogate to the response channel
				if hherr == nil && consistency == ConsistencyLevelAny {
					ch <- &AsyncWriteResult{owner, nil}
					return
				}
			}
			ch <- &AsyncWriteResult{owner, err}

		}(shard.ID, owner, points)
	}

	var wrote int
	timeout := time.After(w.WriteTimeout)
	var writeError error
	for range shard.Owners {
		select {
		case <-w.closing:
			return ErrWriteFailed
		case <-timeout:
			w.statMap.Add(statWriteTimeout, 1)
			// return timeout error to caller
			return ErrTimeout
		case result := <-ch:
			// If the write returned an error, continue to the next response
			if result.Err != nil {
				w.statMap.Add(statWriteErr, 1)
				w.Logger.Printf("write failed for shard %d on node %d: %v", shard.ID, result.Owner.NodeID, result.Err)

				// Keep track of the first error we see to return back to the client
				if writeError == nil {
					writeError = result.Err
				}
				continue
			}

			wrote++

			// We wrote the required consistency level
			if wrote >= required {
				w.statMap.Add(statWriteOK, 1)
				return nil
			}
		}
	}

	if wrote > 0 {
		w.statMap.Add(statWritePartial, 1)
		return ErrPartialWrite
	}

	if writeError != nil {
		return fmt.Errorf("write failed: %v", writeError)
	}

	return ErrWriteFailed
}
Exemple #2
0
func (p *Processor) Process() error {
	p.mu.RLock()
	defer p.mu.RUnlock()

	activeQueues, err := p.activeQueues()
	if err != nil {
		return err
	}

	res := make(chan error, len(activeQueues))
	for nodeID, q := range activeQueues {
		go func(nodeID uint64, q *queue) {

			// Log how many writes we successfully sent at the end
			var sent int
			start := time.Now()
			defer func(start time.Time) {
				if sent > 0 {
					p.Logger.Printf("%d queued writes sent to node %d in %s", sent, nodeID, time.Since(start))
				}
			}(start)

			limiter := NewRateLimiter(p.retryRateLimit)
			for {
				// Get the current block from the queue
				buf, err := q.Current()
				if err != nil {
					if err != io.EOF {
						p.nodeStatMaps[nodeID].Add(currentErr, 1)
					}
					res <- nil
					break
				}

				// unmarshal the byte slice back to shard ID and points
				shardID, points, err := p.unmarshalWrite(buf)
				if err != nil {
					p.nodeStatMaps[nodeID].Add(unmarshalErr, 1)
					p.Logger.Printf("unmarshal write failed: %v", err)
					if err := q.Advance(); err != nil {
						p.nodeStatMaps[nodeID].Add(advanceErr, 1)
						res <- err
					}

					// Skip and try the next block.
					continue
				}

				// Try to send the write to the node
				if err := p.writer.WriteShard(shardID, nodeID, points); err != nil && tsdb.IsRetryable(err) {
					p.nodeStatMaps[nodeID].Add(writeErr, 1)
					p.Logger.Printf("remote write failed: %v", err)
					res <- nil
					break
				}
				p.updateShardStats(shardID, pointsWrite, int64(len(points)))
				p.nodeStatMaps[nodeID].Add(pointsWrite, int64(len(points)))

				// If we get here, the write succeeded so advance the queue to the next item
				if err := q.Advance(); err != nil {
					p.nodeStatMaps[nodeID].Add(advanceErr, 1)
					res <- err
					return
				}

				sent += 1

				// Update how many bytes we've sent
				limiter.Update(len(buf))
				p.updateShardStats(shardID, bytesWrite, int64(len(buf)))
				p.nodeStatMaps[nodeID].Add(bytesWrite, int64(len(buf)))

				// Block to maintain the throughput rate
				time.Sleep(limiter.Delay())

			}
		}(nodeID, q)
	}

	for range activeQueues {
		err := <-res
		if err != nil {
			return err
		}
	}
	return nil
}
Exemple #3
0
func (p *Processor) Process() error {
	p.mu.RLock()
	defer p.mu.RUnlock()

	res := make(chan error, len(p.queues))
	for nodeID, q := range p.queues {
		go func(nodeID uint64, q *queue) {

			// Log how many writes we successfully sent at the end
			var sent int
			start := time.Now()
			defer func(start time.Time) {
				if sent > 0 {
					p.Logger.Printf("%d queued writes sent to node %d in %s", sent, nodeID, time.Since(start))
				}
			}(start)

			limiter := NewRateLimiter(p.retryRateLimit)
			for {
				// Get the current block from the queue
				buf, err := q.Current()
				if err != nil {
					res <- nil
					break
				}

				// unmarshal the byte slice back to shard ID and points
				shardID, points, err := p.unmarshalWrite(buf)
				if err != nil {
					// TODO: If we ever get and error here, we should probably drop the
					// the write and let anti-entropy resolve it.  This would be an urecoverable
					// error and could block the queue indefinitely.
					res <- err
					return
				}

				// Try to send the write to the node
				if err := p.writer.WriteShard(shardID, nodeID, points); err != nil && tsdb.IsRetryable(err) {
					p.Logger.Printf("remote write failed: %v", err)
					res <- nil
					break
				}

				// If we get here, the write succeeded so advance the queue to the next item
				if err := q.Advance(); err != nil {
					res <- err
					return
				}

				sent += 1

				// Update how many bytes we've sent
				limiter.Update(len(buf))

				// Block to maintain the throughput rate
				time.Sleep(limiter.Delay())

			}
		}(nodeID, q)
	}

	for range p.queues {
		err := <-res
		if err != nil {
			return err
		}
	}
	return nil
}