func (self *log) appendRequest(request *protocol.Request, shardId uint32) error { bytes, err := request.Encode() if err != nil { return err } // every request is preceded with the length, shard id and the request number hdr := &entryHeader{ shardId: shardId, requestNumber: request.GetRequestNumber(), length: uint32(len(bytes)), } writtenHdrBytes, err := hdr.Write(self.file) if err != nil { logger.Error("Error while writing header: %s", err) return err } written, err := self.file.Write(bytes) if err != nil { logger.Error("Error while writing request: %s", err) return err } if written < len(bytes) { err = fmt.Errorf("Couldn't write entire request") logger.Error("Error while writing request: %s", err) return err } self.fileSize += uint64(writtenHdrBytes + written) return nil }
func (self *ClusterServer) Write(request *protocol.Request) error { responseChan := make(chan *protocol.Response, 1) rc := NewResponseChannelWrapper(responseChan) err := self.connection.MakeRequest(request, rc) if err != nil { return err } log.Debug("Waiting for response to %d", request.GetRequestNumber()) response := <-responseChan if response.ErrorMessage != nil { return errors.New(*response.ErrorMessage) } return nil }
// This method never blocks. It'll buffer writes until they fill the buffer then drop the on the // floor and let the background goroutine replay from the WAL func (self *WriteBuffer) Write(request *protocol.Request) { self.shardLastRequestNumber[request.GetShardId()] = request.GetRequestNumber() select { case self.writes <- request: log.Debug("Buffering %d:%d for %s", request.GetRequestNumber(), request.GetShardId(), self.writerInfo) return default: select { case self.stoppedWrites <- *request.RequestNumber: return default: return } } }
func (self *WriteBuffer) write(request *protocol.Request) { attempts := 0 for { self.shardIds[*request.ShardId] = true err := self.writer.Write(request) if err == nil { requestNumber := request.RequestNumber if requestNumber == nil { return } self.shardCommitedRequestNumber[request.GetShardId()] = *requestNumber log.Debug("Commiting %d:%d for %s", request.GetRequestNumber(), request.GetShardId(), self.writerInfo) self.wal.Commit(*requestNumber, self.serverId) return } if attempts%100 == 0 { log.Error("%s: WriteBuffer: error on write to server %d: %s", self.writerInfo, self.serverId, err) } attempts += 1 // backoff happens in the writer, just sleep for a small fixed amount of time before retrying time.Sleep(time.Millisecond * 100) } }
func (self *WriteBuffer) replayAndRecover(missedRequest uint32) { var req *protocol.Request // empty out the buffer before the replay so new writes can buffer while we're replaying channelLen := len(self.writes) // This is the first run through the replay. Start from the start of the write queue for i := 0; i < channelLen; i++ { r := <-self.writes if req == nil { req = r } } if req == nil { log.Error("%s: REPLAY: emptied channel, but no request set", self.writerInfo) return } log.Debug("%s: REPLAY: Emptied out channel", self.writerInfo) shardIds := make([]uint32, 0) for shardId := range self.shardIds { shardIds = append(shardIds, shardId) } // while we're behind keep replaying from WAL for { log.Info("%s: REPLAY: Replaying dropped requests...", self.writerInfo) log.Debug("%s: REPLAY: from request %d. Shards: %v", self.writerInfo, req.GetRequestNumber(), shardIds) self.wal.RecoverServerFromRequestNumber(*req.RequestNumber, shardIds, func(request *protocol.Request, shardId uint32) error { log.Debug("%s: REPLAY: writing request number: %d", self.writerInfo, request.GetRequestNumber()) req = request request.ShardId = &shardId self.write(request) return nil }) log.Info("%s: REPLAY: Emptying out reqeusts from buffer that we've already replayed", self.writerInfo) RequestLoop: for { select { case newReq := <-self.writes: if *newReq.RequestNumber == *req.RequestNumber { break RequestLoop } default: log.Error("%s: REPLAY: Got to the end of the write buffer without getting to the last written request.", self.writerInfo) break RequestLoop } } log.Info("%s: REPLAY: done.", self.writerInfo) // now make sure that no new writes were dropped. If so, do the replay again from this place. select { case <-self.stoppedWrites: log.Info("%s: REPLAY: Buffer backed up while replaying, going again.", self.writerInfo) continue default: return } } }