// proxyMessage proxies a message, possibly it's response, and possibly a // follow up call. func (p *Proxy) proxyMessage( h *messageHeader, client net.Conn, server net.Conn, lastError *LastError, ) error { deadline := time.Now().Add(p.ReplicaSet.MessageTimeout) server.SetDeadline(deadline) client.SetDeadline(deadline) // OpQuery may need to be transformed and need special handling in order to // make the proxy transparent. if h.OpCode == OpQuery { stats.BumpSum(p.stats, "message.with.response", 1) return p.ReplicaSet.ProxyQuery.Proxy(h, client, server, lastError) } // Anything besides a getlasterror call (which requires an OpQuery) resets // the lastError. if lastError.Exists() { corelog.LogInfoMessage("reset getLastError cache") lastError.Reset() } // For other Ops we proxy the header & raw body over. if err := h.WriteTo(server); err != nil { corelog.LogError("error", err) return err } if _, err := io.CopyN(server, client, int64(h.MessageLength-headerLen)); err != nil { corelog.LogError("error", err) return err } // For Ops with responses we proxy the raw response message over. if h.OpCode.HasResponse() { stats.BumpSum(p.stats, "message.with.response", 1) if err := copyMessage(client, server); err != nil { corelog.LogError("error", err) return err } } return nil }
func (p *Proxy) clientReadHeader(c net.Conn, timeout time.Duration) (*messageHeader, error) { t := stats.BumpTime(p.stats, "client.read.header.time") type headerError struct { header *messageHeader error error } resChan := make(chan headerError) c.SetReadDeadline(time.Now().Add(timeout)) go func() { h, err := readHeader(c) resChan <- headerError{header: h, error: err} }() closed := false var response headerError select { case response = <-resChan: // all good case <-p.closed: closed = true c.SetReadDeadline(timeInPast) response = <-resChan } // Successfully read a header. if response.error == nil { t.End() return response.header, nil } // Client side disconnected. if response.error == io.EOF { stats.BumpSum(p.stats, "client.clean.disconnect", 1) return nil, errNormalClose } // We hit our ReadDeadline. if ne, ok := response.error.(net.Error); ok && ne.Timeout() { if closed { stats.BumpSum(p.stats, "client.clean.disconnect", 1) return nil, errNormalClose } return nil, errClientReadTimeout } // Some other unknown error. stats.BumpSum(p.stats, "client.error.disconnect", 1) corelog.LogError("error", response.error) return nil, response.error }
// ReadOne reads a 1 document response, from the server, unmarshals it into v // and returns the various parts. func (r *ReplyRW) ReadOne(server io.Reader, v interface{}) (*messageHeader, replyPrefix, int32, error) { h, err := readHeader(server) if err != nil { corelog.LogError("error", err) return nil, emptyPrefix, 0, err } if h.OpCode != OpReply { err := fmt.Errorf("readOneReplyDoc: expected op %s, got %s", OpReply, h.OpCode) return nil, emptyPrefix, 0, err } var prefix replyPrefix if _, err := io.ReadFull(server, prefix[:]); err != nil { corelog.LogError("error", err) return nil, emptyPrefix, 0, err } numDocs := getInt32(prefix[:], 16) if numDocs != 1 { err := fmt.Errorf("readOneReplyDoc: can only handle 1 result document, got: %d", numDocs) return nil, emptyPrefix, 0, err } rawDoc, err := readDocument(server) if err != nil { corelog.LogError("error", err) return nil, emptyPrefix, 0, err } if err := bson.Unmarshal(rawDoc, v); err != nil { corelog.LogError("error", err) return nil, emptyPrefix, 0, err } return h, prefix, int32(len(rawDoc)), nil }
// clientAcceptLoop accepts new clients and creates a clientServeLoop for each // new client that connects to the proxy. func (p *Proxy) clientAcceptLoop() { for { p.wg.Add(1) c, err := p.ClientListener.Accept() if err != nil { p.wg.Done() if strings.Contains(err.Error(), "use of closed network connection") { break } corelog.LogError("error", err) continue } go p.clientServeLoop(c) } }
// Open up a new connection to the server. Retry 7 times, doubling the sleep // each time. This means we'll a total of 12.75 seconds with the last wait // being 6.4 seconds. func (p *Proxy) newServerConn() (io.Closer, error) { retrySleep := 50 * time.Millisecond for retryCount := 7; retryCount > 0; retryCount-- { c, err := net.DialTimeout("tcp", p.MongoAddr, time.Second) if err == nil { if len(p.Username) == 0 { return c, nil } err = p.AuthConn(c) if err == nil { return c, nil } } corelog.LogError("error", err) time.Sleep(retrySleep) retrySleep = retrySleep * 2 } return nil, fmt.Errorf("could not connect to %s", p.MongoAddr) }
// Proxy proxies an OpQuery and a corresponding response. func (p *ProxyQuery) Proxy( h *messageHeader, client io.ReadWriter, server io.ReadWriter, lastError *LastError, ) error { // https://github.com/mongodb/mongo/search?q=lastError.disableForCommand // Shows the logic we need to be in sync with. Unfortunately it isn't a // simple check to determine this, and may change underneath us at the mongo // layer. resetLastError := true parts := [][]byte{h.ToWire()} var flags [4]byte if _, err := io.ReadFull(client, flags[:]); err != nil { corelog.LogError("error", err) return err } parts = append(parts, flags[:]) fullCollectionName, err := readCString(client) if err != nil { corelog.LogError("error", err) return err } parts = append(parts, fullCollectionName) var rewriter responseRewriter if *proxyAllQueries || bytes.HasSuffix(fullCollectionName, cmdCollectionSuffix) { var twoInt32 [8]byte if _, err := io.ReadFull(client, twoInt32[:]); err != nil { corelog.LogError("error", err) return err } parts = append(parts, twoInt32[:]) queryDoc, err := readDocument(client) if err != nil { corelog.LogError("error", err) return err } parts = append(parts, queryDoc) var q bson.D if err := bson.Unmarshal(queryDoc, &q); err != nil { corelog.LogError("error", err) return err } if hasKey(q, "getLastError") { return p.GetLastErrorRewriter.Rewrite( h, parts, client, server, lastError, ) } if hasKey(q, "isMaster") { rewriter = p.IsMasterResponseRewriter } if bytes.Equal(adminCollectionName, fullCollectionName) && hasKey(q, "replSetGetStatus") { rewriter = p.ReplSetGetStatusResponseRewriter } if rewriter != nil { // If forShell is specified, we don't want to reset the last error. See // comment above around resetLastError for details. resetLastError = hasKey(q, "forShell") } } if resetLastError && lastError.Exists() { corelog.LogInfoMessage("reset getLastError cache") lastError.Reset() } var written int for _, b := range parts { n, err := server.Write(b) if err != nil { corelog.LogError("error", err) return err } written += n } pending := int64(h.MessageLength) - int64(written) if _, err := io.CopyN(server, client, pending); err != nil { corelog.LogError("error", err) return err } if rewriter != nil { if err := rewriter.Rewrite(client, server); err != nil { return err } return nil } if err := copyMessage(client, server); err != nil { corelog.LogError("error", err) return err } return nil }
// Rewrite handles getLastError requests. func (r *GetLastErrorRewriter) Rewrite( h *messageHeader, parts [][]byte, client io.ReadWriter, server io.ReadWriter, lastError *LastError, ) error { if !lastError.Exists() { // We're going to be performing a real getLastError query and caching the // response. var written int for _, b := range parts { n, err := server.Write(b) if err != nil { corelog.LogError("error", err) return err } written += n } pending := int64(h.MessageLength) - int64(written) if _, err := io.CopyN(server, client, pending); err != nil { corelog.LogError("error", err) return err } var err error if lastError.header, err = readHeader(server); err != nil { corelog.LogError("error", err) return err } pending = int64(lastError.header.MessageLength - headerLen) if _, err = io.CopyN(&lastError.rest, server, pending); err != nil { corelog.LogError("error", err) return err } corelog.LogInfoMessage(fmt.Sprintf("caching new getLastError response: %s", lastError.rest.Bytes())) } else { // We need to discard the pending bytes from the client from the query // before we send it our cached response. var written int for _, b := range parts { written += len(b) } pending := int64(h.MessageLength) - int64(written) if _, err := io.CopyN(ioutil.Discard, client, pending); err != nil { corelog.LogError("error", err) return err } // Modify and send the cached response for this request. lastError.header.ResponseTo = h.RequestID corelog.LogInfoMessage("using cached getLastError response: %s", lastError.rest.Bytes()) } if err := lastError.header.WriteTo(client); err != nil { corelog.LogError("error", err) return err } if _, err := client.Write(lastError.rest.Bytes()); err != nil { corelog.LogError("error", err) return err } return nil }
// clientServeLoop loops on a single client connected to the proxy and // dispatches its requests. func (p *Proxy) clientServeLoop(c net.Conn) { remoteIP := c.RemoteAddr().(*net.TCPAddr).IP.String() // enforce per-client max connection limit if p.maxPerClientConnections.inc(remoteIP) { c.Close() stats.BumpSum(p.stats, "client.rejected.max.connections", 1) corelog.LogErrorMessage(fmt.Sprintf("rejecting client connection due to max connections limit: %s", remoteIP)) return } // turn on TCP keep-alive and set it to the recommended period of 2 minutes // http://docs.mongodb.org/manual/faq/diagnostics/#faq-keepalive if conn, ok := c.(*net.TCPConn); ok { conn.SetKeepAlivePeriod(2 * time.Minute) conn.SetKeepAlive(true) } c = teeIf(fmt.Sprintf("client %s <=> %s", c.RemoteAddr(), p), c) stats.BumpSum(p.stats, "client.connected", 1) defer func() { p.wg.Done() if err := c.Close(); err != nil { corelog.LogError("error", err) } p.maxPerClientConnections.dec(remoteIP) }() var lastError LastError for { h, err := p.idleClientReadHeader(c) if err != nil { if err != errNormalClose { corelog.LogError("error", err) } return } mpt := stats.BumpTime(p.stats, "message.proxy.time") serverConn, err := p.getServerConn() if err != nil { if err != errNormalClose { corelog.LogError("error", err) } return } scht := stats.BumpTime(p.stats, "server.conn.held.time") for { err := p.proxyMessage(h, c, serverConn, &lastError) if err != nil { p.serverPool.Discard(serverConn) corelog.LogErrorMessage(fmt.Sprintf("Proxy message failed %s ", err)) stats.BumpSum(p.stats, "message.proxy.error", 1) if ne, ok := err.(net.Error); ok && ne.Timeout() { stats.BumpSum(p.stats, "message.proxy.timeout", 1) } return } // One message was proxied, stop it's timer. mpt.End() if !h.OpCode.IsMutation() { break } // If the operation we just performed was a mutation, we always make the // follow up request on the same server because it's possibly a getLastErr // call which expects this behavior. stats.BumpSum(p.stats, "message.with.mutation", 1) h, err = p.gleClientReadHeader(c) if err != nil { // Client did not make _any_ query within the GetLastErrorTimeout. // Return the server to the pool and wait go back to outer loop. if err == errClientReadTimeout { break } // Prevent noise of normal client disconnects, but log if anything else. if err != errNormalClose { corelog.LogError("error", err) } // We need to return our server to the pool (it's still good as far // as we know). p.serverPool.Release(serverConn) return } // Successfully read message when waiting for the getLastError call. mpt = stats.BumpTime(p.stats, "message.proxy.time") } p.serverPool.Release(serverConn) scht.End() stats.BumpSum(p.stats, "message.proxy.success", 1) } }
func (p *Proxy) serverCloseErrorHandler(err error) { corelog.LogError("error", err) }