func (self *log) appendRequest(request *protocol.Request, shardId uint32) error { bytes, err := request.Encode() if err != nil { return err } // every request is preceded with the length, shard id and the request number hdr := &entryHeader{ shardId: shardId, requestNumber: request.GetRequestNumber(), length: uint32(len(bytes)), } writtenHdrBytes, err := hdr.Write(self.file) if err != nil { logger.Error("Error while writing header: %s", err) return err } written, err := self.file.Write(bytes) if err != nil { logger.Error("Error while writing request: %s", err) return err } if written < len(bytes) { err = fmt.Errorf("Couldn't write entire request") logger.Error("Error while writing request: %s", err) return err } self.fileSize += uint64(writtenHdrBytes + written) return nil }
// Makes a request to the server. If the responseStream chan is not nil it will expect a response from the server // with a matching request.Id. The REQUEST_RETRY_ATTEMPTS constant of 3 and the RECONNECT_RETRY_WAIT of 100ms means // that an attempt to make a request to a downed server will take 300ms to time out. func (self *ProtobufClient) MakeRequest(request *protocol.Request, r cluster.ResponseChannel) error { if request.Id == nil { id := atomic.AddUint32(&self.lastRequestId, uint32(1)) request.Id = &id } if r != nil { self.requestBufferLock.Lock() // this should actually never happen. The sweeper should clear out dead requests // before the uint32 ids roll over. if oldReq, alreadyHasRequestById := self.requestBuffer[*request.Id]; alreadyHasRequestById { message := "already has a request with this id, must have timed out" log.Error(message) oldReq.r.Yield(&protocol.Response{ Type: protocol.Response_ERROR.Enum(), ErrorMessage: &message, }) } self.requestBuffer[*request.Id] = &runningRequest{timeMade: time.Now(), r: r, request: request} self.requestBufferLock.Unlock() } data, err := request.Encode() if err != nil { return err } conn := self.getConnection() if conn == nil { conn = self.reconnect() if conn == nil { return fmt.Errorf("Failed to connect to server %s", self.hostAndPort) } } if self.writeTimeout > 0 { conn.SetWriteDeadline(time.Now().Add(self.writeTimeout)) } buff := bytes.NewBuffer(make([]byte, 0, len(data)+8)) binary.Write(buff, binary.LittleEndian, uint32(len(data))) buff.Write(data) _, err = conn.Write(buff.Bytes()) if err == nil { return nil } // if we got here it errored out, clear out the request self.requestBufferLock.Lock() delete(self.requestBuffer, *request.Id) self.requestBufferLock.Unlock() self.reconnect() return err }
func (self *ShardData) forwardRequest(request *p.Request) ([]<-chan *p.Response, []uint32, error) { ids := []uint32{} responses := []<-chan *p.Response{} for _, server := range self.clusterServers { responseChan := make(chan *p.Response, 1) // do this so that a new id will get assigned request.Id = nil log.Debug("Forwarding request %s to %d", request.GetDescription(), server.Id) server.MakeRequest(request, responseChan) responses = append(responses, responseChan) ids = append(ids, server.Id) } return responses, ids, nil }
func (self *ClusterServer) Write(request *protocol.Request) error { responseChan := make(chan *protocol.Response, 1) rc := NewResponseChannelWrapper(responseChan) err := self.connection.MakeRequest(request, rc) if err != nil { return err } log.Debug("Waiting for response to %d", request.GetRequestNumber()) response := <-responseChan if response.ErrorMessage != nil { return errors.New(*response.ErrorMessage) } return nil }
func (self *ShardData) Write(request *p.Request) error { request.ShardId = &self.id requestNumber, err := self.wal.AssignSequenceNumbersAndLog(request, self) if err != nil { return err } request.RequestNumber = &requestNumber if self.store != nil { self.store.BufferWrite(request) } for _, server := range self.clusterServers { // we have to create a new reqeust object because the ID gets assigned on each server. requestWithoutId := &p.Request{Type: request.Type, Database: request.Database, MultiSeries: request.MultiSeries, ShardId: &self.id, RequestNumber: request.RequestNumber} server.BufferWrite(requestWithoutId) } return nil }
func (self *ProtobufRequestHandler) handleWrites(request *protocol.Request, conn net.Conn) { shard := self.clusterConfig.GetLocalShardById(*request.ShardId) log.Debug("HANDLE: (%d):%d:%v", self.clusterConfig.LocalServer.Id, request.GetId(), shard) err := shard.WriteLocalOnly(request) var response *protocol.Response if err != nil { log.Error("ProtobufRequestHandler: error writing local shard: %s", err) response = &protocol.Response{ RequestId: request.Id, Type: protocol.Response_ERROR.Enum(), ErrorMessage: protocol.String(err.Error()), } } else { response = &protocol.Response{ RequestId: request.Id, Type: protocol.Response_END_STREAM.Enum(), } } if err := self.WriteResponse(conn, response); err != nil { log.Error("ProtobufRequestHandler: error writing local shard: %s", err) } }
// This method never blocks. It'll buffer writes until they fill the buffer then drop the on the // floor and let the background goroutine replay from the WAL func (self *WriteBuffer) Write(request *protocol.Request) { self.shardLastRequestNumber[request.GetShardId()] = request.GetRequestNumber() select { case self.writes <- request: log.Debug("Buffering %d:%d for %s", request.GetRequestNumber(), request.GetShardId(), self.writerInfo) return default: select { case self.stoppedWrites <- *request.RequestNumber: return default: return } } }
func (self *ShardData) HandleDestructiveQuery(querySpec *parser.QuerySpec, request *p.Request, response chan<- *p.Response, runLocalOnly bool) { if !self.IsLocal && runLocalOnly { panic("WTF islocal is false and runLocalOnly is true") } responseChannels := []<-chan *p.Response{} serverIds := []uint32{} if self.IsLocal { err := self.deleteDataLocally(querySpec) if err != nil { msg := err.Error() log.Error(msg) response <- &p.Response{ Type: p.Response_ERROR.Enum(), ErrorMessage: &msg, } return } } log.Debug("request %s, runLocalOnly: %v", request.GetDescription(), runLocalOnly) if !runLocalOnly { responses, ids, _ := self.forwardRequest(request) serverIds = append(serverIds, ids...) responseChannels = append(responseChannels, responses...) } var errorResponse *p.Response for idx, channel := range responseChannels { serverId := serverIds[idx] log.Debug("Waiting for response to %s from %d", request.GetDescription(), serverId) for { res := <-channel log.Debug("Received %s response from %d for %s", res.GetType(), serverId, request.GetDescription()) if res.GetType() == p.Response_END_STREAM { break } // don't send the access denied response until the end so the readers don't close out before the other responses. // See https://github.com/Wikia/influxdb/issues/316 for more info. if res.GetType() != p.Response_ERROR { response <- res } else if errorResponse == nil { errorResponse = res } } } if errorResponse != nil { response <- errorResponse return } response <- &p.Response{Type: p.Response_END_STREAM.Enum()} }
func (self *ShardData) SyncWrite(request *p.Request, assignSeqNum bool) error { if assignSeqNum { self.wal.AssignSequenceNumbers(request) } request.ShardId = &self.id for _, server := range self.clusterServers { if err := server.Write(request); err != nil { return err } } if self.store == nil { return nil } return self.store.Write(request) }
func (self *WriteBuffer) write(request *protocol.Request) { attempts := 0 for { self.shardIds[*request.ShardId] = true err := self.writer.Write(request) if err == nil { requestNumber := request.RequestNumber if requestNumber == nil { return } self.shardCommitedRequestNumber[request.GetShardId()] = *requestNumber log.Debug("Commiting %d:%d for %s", request.GetRequestNumber(), request.GetShardId(), self.writerInfo) self.wal.Commit(*requestNumber, self.serverId) return } if attempts%100 == 0 { log.Error("%s: WriteBuffer: error on write to server %d: %s", self.writerInfo, self.serverId, err) } attempts += 1 // backoff happens in the writer, just sleep for a small fixed amount of time before retrying time.Sleep(time.Millisecond * 100) } }
func (self *WriteBuffer) replayAndRecover(missedRequest uint32) { var req *protocol.Request // empty out the buffer before the replay so new writes can buffer while we're replaying channelLen := len(self.writes) // This is the first run through the replay. Start from the start of the write queue for i := 0; i < channelLen; i++ { r := <-self.writes if req == nil { req = r } } if req == nil { log.Error("%s: REPLAY: emptied channel, but no request set", self.writerInfo) return } log.Debug("%s: REPLAY: Emptied out channel", self.writerInfo) shardIds := make([]uint32, 0) for shardId := range self.shardIds { shardIds = append(shardIds, shardId) } // while we're behind keep replaying from WAL for { log.Info("%s: REPLAY: Replaying dropped requests...", self.writerInfo) log.Debug("%s: REPLAY: from request %d. Shards: %v", self.writerInfo, req.GetRequestNumber(), shardIds) self.wal.RecoverServerFromRequestNumber(*req.RequestNumber, shardIds, func(request *protocol.Request, shardId uint32) error { log.Debug("%s: REPLAY: writing request number: %d", self.writerInfo, request.GetRequestNumber()) req = request request.ShardId = &shardId self.write(request) return nil }) log.Info("%s: REPLAY: Emptying out reqeusts from buffer that we've already replayed", self.writerInfo) RequestLoop: for { select { case newReq := <-self.writes: if *newReq.RequestNumber == *req.RequestNumber { break RequestLoop } default: log.Error("%s: REPLAY: Got to the end of the write buffer without getting to the last written request.", self.writerInfo) break RequestLoop } } log.Info("%s: REPLAY: done.", self.writerInfo) // now make sure that no new writes were dropped. If so, do the replay again from this place. select { case <-self.stoppedWrites: log.Info("%s: REPLAY: Buffer backed up while replaying, going again.", self.writerInfo) continue default: return } } }