func (manager *StateManager) removeProxy(proxy *Proxy) { if _, ok := manager.proxyToReal[proxy.ProxyAddr]; !ok { corelog.LogErrorMessage(fmt.Sprintf("proxy %s does not exist in ReplicaSet", proxy.ProxyAddr)) } if _, ok := manager.realToProxy[proxy.MongoAddr]; !ok { corelog.LogErrorMessage(fmt.Sprintf("mongo %s does not exist in ReplicaSet", proxy.ProxyAddr)) } corelog.LogInfoMessage(fmt.Sprintf("removed %s", proxy)) delete(manager.proxyToReal, proxy.ProxyAddr) delete(manager.realToProxy, proxy.MongoAddr) delete(manager.proxies, proxy.ProxyAddr) }
// Attemps to connect to Mongo through Dvara, with timeout. func (r *ReplicaSet) Check(timeout time.Duration) error { errChan := make(chan error) go r.runCheck(errChan) // blocking wait select { case err := <-errChan: if err != nil { r.Stats.BumpSum("healthcheck.failed", 1) corelog.LogErrorMessage(fmt.Sprintf("Failed healthcheck due to %s", err)) } else { r.Stats.BumpSum("healthcheck.connected", 1) } return err case <-time.After(timeout): r.Stats.BumpSum("healthcheck.failed", 1) corelog.LogErrorMessage(fmt.Sprintf("Failed healthcheck due to timeout %s", timeout)) return errors.New("Failed due to timeout") } }
// Get new state for a replica set, and synchronize internal state. func (manager *StateManager) Synchronize() { defer manager.replicaSet.Stats.BumpTime("replica.manager.time").End() manager.replicaSet.Stats.BumpHistogram("replica.manager.rs_state_age", float64(time.Since(manager.refreshTime).Nanoseconds())) manager.RLock() newState, err := manager.generateReplicaSetState() if err != nil { manager.replicaSet.Stats.BumpSum("replica.manager.failed_state_check", 1) corelog.LogErrorMessage(fmt.Sprintf("all nodes possibly down?: %s", err)) manager.RUnlock() return } comparison, err := manager.getComparison(manager.currentReplicaSetState.lastRS, newState.lastRS) if err != nil { manager.replicaSet.Stats.BumpSum("replica.manager.failed_comparison", 1) corelog.LogErrorMessage(fmt.Sprintf("Manager failed comparison %s", err)) manager.RUnlock() return } manager.RUnlock() // all reads done defer manager.replicaSet.Stats.BumpTime("replica.manager.time.locked").End() manager.Lock() defer manager.Unlock() if err = manager.addRemoveProxies(comparison); err != nil { manager.replicaSet.Stats.BumpSum("replica.manager.failed_proxy_update", 1) corelog.LogErrorMessage(fmt.Sprintf("Manager failed proxy update %s", err)) return } manager.stopStartProxies(comparison) manager.currentReplicaSetState = newState // Add discovered nodes to seed address list. Over time if the original seed // nodes have gone away and new nodes have joined this ensures that we'll // still be able to connect. rawAddrs := strings.Split(manager.baseAddrs, ",") manager.baseAddrs = strings.Join(uniq(append(rawAddrs, manager.currentReplicaSetState.Addrs()...)), ",") manager.refreshTime = time.Now() }
// FromAddrs creates a ReplicaSetState from the given set of see addresses. It // requires the addresses to be part of the same Replica Set. func (c *ReplicaSetStateCreator) FromAddrs(username, password string, addrs []string, replicaSetName string) (*ReplicaSetState, error) { var r *ReplicaSetState for _, addr := range addrs { ar, err := NewReplicaSetState(username, password, addr) if err != nil { if err != errNoReachableServers { corelog.LogErrorMessage(fmt.Sprintf("ignoring failure against address %s: %s", addr, err)) } continue } if replicaSetName != "" { if ar.lastRS == nil { corelog.LogErrorMessage(fmt.Sprintf("ignoring standalone node %q not in expected replset: %q", addr, replicaSetName)) continue } if ar.lastRS.Name != replicaSetName { corelog.LogErrorMessage(fmt.Sprintf("ignoring node %q not in expected replset: %q vs %q", addr, ar.lastRS.Name, replicaSetName)) continue } } // First successful address. if r == nil { r = ar continue } // Ensure same as already established ReplicaSetState. if err := r.AssertEqual(ar); err != nil { return nil, err } } if r == nil { return nil, fmt.Errorf("could not connect to any provided addresses: %v", addrs) } return r, nil }
// ServeHTTP makes ContextHandler satisifies the http.Handler interface func (ch *ContextHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { ch.Context, ch.Cancel = context.WithCancel(r.Context()) ch.RequestID = uuid.New() ch.Context = context.WithValue(ch.Context, "requestID", ch.RequestID) ch.Logger = ch.Logger.SetStandardFields("requestID", ch.RequestID) // measure timing info defer metrics.MeasureSince(fmt.Sprintf("api.%s_%s", r.URL.String(), r.Method), time.Now()) defer func(ctx context.Context) { // Panic recovery if rcv := recover(); rcv != nil { log.LogErrorMessage("Request Panicked", "status", 500, "requestID", ctx.Value("requestID"), "error", rcv) ch.Metrics.IncrementCount(fmt.Sprintf("api.%s_%s.error", r.URL.String(), r.Method)) err := errors.New(fmt.Sprint(rcv)) ch.Monitor.CaptureExceptionWithTags(err, "requestID", ctx.Value("requestID"), "endpoint", r.URL.String()) JSONErrorResponse(500, err).WriteTo(w) } ch.Cancel() // cancel on error }(ch.Context) ch.handlerFunc(ch, &StatusWrappingResponseWriter{w, 0}, r) ch.Cancel() }
func (manager *StateManager) stopProxy(proxy *Proxy) { if err := proxy.stop(true); err != nil { corelog.LogErrorMessage(fmt.Sprintf("Failed to stop proxy %s", proxy)) } }
func (manager *StateManager) startProxy(proxy *Proxy) { if err := proxy.Start(); err != nil { corelog.LogErrorMessage(fmt.Sprintf("Failed to start proxy %s", proxy)) } }
// clientServeLoop loops on a single client connected to the proxy and // dispatches its requests. func (p *Proxy) clientServeLoop(c net.Conn) { remoteIP := c.RemoteAddr().(*net.TCPAddr).IP.String() // enforce per-client max connection limit if p.maxPerClientConnections.inc(remoteIP) { c.Close() stats.BumpSum(p.stats, "client.rejected.max.connections", 1) corelog.LogErrorMessage(fmt.Sprintf("rejecting client connection due to max connections limit: %s", remoteIP)) return } // turn on TCP keep-alive and set it to the recommended period of 2 minutes // http://docs.mongodb.org/manual/faq/diagnostics/#faq-keepalive if conn, ok := c.(*net.TCPConn); ok { conn.SetKeepAlivePeriod(2 * time.Minute) conn.SetKeepAlive(true) } c = teeIf(fmt.Sprintf("client %s <=> %s", c.RemoteAddr(), p), c) stats.BumpSum(p.stats, "client.connected", 1) defer func() { p.wg.Done() if err := c.Close(); err != nil { corelog.LogError("error", err) } p.maxPerClientConnections.dec(remoteIP) }() var lastError LastError for { h, err := p.idleClientReadHeader(c) if err != nil { if err != errNormalClose { corelog.LogError("error", err) } return } mpt := stats.BumpTime(p.stats, "message.proxy.time") serverConn, err := p.getServerConn() if err != nil { if err != errNormalClose { corelog.LogError("error", err) } return } scht := stats.BumpTime(p.stats, "server.conn.held.time") for { err := p.proxyMessage(h, c, serverConn, &lastError) if err != nil { p.serverPool.Discard(serverConn) corelog.LogErrorMessage(fmt.Sprintf("Proxy message failed %s ", err)) stats.BumpSum(p.stats, "message.proxy.error", 1) if ne, ok := err.(net.Error); ok && ne.Timeout() { stats.BumpSum(p.stats, "message.proxy.timeout", 1) } return } // One message was proxied, stop it's timer. mpt.End() if !h.OpCode.IsMutation() { break } // If the operation we just performed was a mutation, we always make the // follow up request on the same server because it's possibly a getLastErr // call which expects this behavior. stats.BumpSum(p.stats, "message.with.mutation", 1) h, err = p.gleClientReadHeader(c) if err != nil { // Client did not make _any_ query within the GetLastErrorTimeout. // Return the server to the pool and wait go back to outer loop. if err == errClientReadTimeout { break } // Prevent noise of normal client disconnects, but log if anything else. if err != errNormalClose { corelog.LogError("error", err) } // We need to return our server to the pool (it's still good as far // as we know). p.serverPool.Release(serverConn) return } // Successfully read message when waiting for the getLastError call. mpt = stats.BumpTime(p.stats, "message.proxy.time") } p.serverPool.Release(serverConn) scht.End() stats.BumpSum(p.stats, "message.proxy.success", 1) } }
func (r *ReplicaSet) HandleFailure() { corelog.LogErrorMessage("Crashing dvara due to consecutive failed healthchecks") r.Stats.BumpSum("healthcheck.failed.panic", 1) panic("failed healthchecks") }
func (l *Logger) Errorf(f string, args ...interface{}) { corelog.LogErrorMessage(fmt.Sprintf(f, args...)) }