Beispiel #1
0
func (manager *StateManager) removeProxy(proxy *Proxy) {
	if _, ok := manager.proxyToReal[proxy.ProxyAddr]; !ok {
		corelog.LogErrorMessage(fmt.Sprintf("proxy %s does not exist in ReplicaSet", proxy.ProxyAddr))
	}
	if _, ok := manager.realToProxy[proxy.MongoAddr]; !ok {
		corelog.LogErrorMessage(fmt.Sprintf("mongo %s does not exist in ReplicaSet", proxy.ProxyAddr))
	}
	corelog.LogInfoMessage(fmt.Sprintf("removed %s", proxy))
	delete(manager.proxyToReal, proxy.ProxyAddr)
	delete(manager.realToProxy, proxy.MongoAddr)
	delete(manager.proxies, proxy.ProxyAddr)
}
Beispiel #2
0
// Attemps to connect to Mongo through Dvara, with timeout.
func (r *ReplicaSet) Check(timeout time.Duration) error {
	errChan := make(chan error)
	go r.runCheck(errChan)
	// blocking wait
	select {
	case err := <-errChan:
		if err != nil {
			r.Stats.BumpSum("healthcheck.failed", 1)
			corelog.LogErrorMessage(fmt.Sprintf("Failed healthcheck due to %s", err))
		} else {
			r.Stats.BumpSum("healthcheck.connected", 1)
		}
		return err
	case <-time.After(timeout):
		r.Stats.BumpSum("healthcheck.failed", 1)
		corelog.LogErrorMessage(fmt.Sprintf("Failed healthcheck due to timeout %s", timeout))
		return errors.New("Failed due to timeout")
	}
}
Beispiel #3
0
// Get new state for a replica set, and synchronize internal state.
func (manager *StateManager) Synchronize() {
	defer manager.replicaSet.Stats.BumpTime("replica.manager.time").End()
	manager.replicaSet.Stats.BumpHistogram("replica.manager.rs_state_age", float64(time.Since(manager.refreshTime).Nanoseconds()))

	manager.RLock()
	newState, err := manager.generateReplicaSetState()
	if err != nil {
		manager.replicaSet.Stats.BumpSum("replica.manager.failed_state_check", 1)
		corelog.LogErrorMessage(fmt.Sprintf("all nodes possibly down?: %s", err))
		manager.RUnlock()
		return
	}

	comparison, err := manager.getComparison(manager.currentReplicaSetState.lastRS, newState.lastRS)
	if err != nil {
		manager.replicaSet.Stats.BumpSum("replica.manager.failed_comparison", 1)
		corelog.LogErrorMessage(fmt.Sprintf("Manager failed comparison %s", err))
		manager.RUnlock()
		return
	}
	manager.RUnlock() // all reads done

	defer manager.replicaSet.Stats.BumpTime("replica.manager.time.locked").End()

	manager.Lock()
	defer manager.Unlock()
	if err = manager.addRemoveProxies(comparison); err != nil {
		manager.replicaSet.Stats.BumpSum("replica.manager.failed_proxy_update", 1)
		corelog.LogErrorMessage(fmt.Sprintf("Manager failed proxy update %s", err))
		return
	}

	manager.stopStartProxies(comparison)
	manager.currentReplicaSetState = newState

	// Add discovered nodes to seed address list. Over time if the original seed
	// nodes have gone away and new nodes have joined this ensures that we'll
	// still be able to connect.
	rawAddrs := strings.Split(manager.baseAddrs, ",")
	manager.baseAddrs = strings.Join(uniq(append(rawAddrs, manager.currentReplicaSetState.Addrs()...)), ",")
	manager.refreshTime = time.Now()
}
Beispiel #4
0
// FromAddrs creates a ReplicaSetState from the given set of see addresses. It
// requires the addresses to be part of the same Replica Set.
func (c *ReplicaSetStateCreator) FromAddrs(username, password string, addrs []string, replicaSetName string) (*ReplicaSetState, error) {
	var r *ReplicaSetState
	for _, addr := range addrs {
		ar, err := NewReplicaSetState(username, password, addr)
		if err != nil {
			if err != errNoReachableServers {
				corelog.LogErrorMessage(fmt.Sprintf("ignoring failure against address %s: %s", addr, err))
			}
			continue
		}

		if replicaSetName != "" {
			if ar.lastRS == nil {
				corelog.LogErrorMessage(fmt.Sprintf("ignoring standalone node %q not in expected replset: %q", addr, replicaSetName))
				continue
			}
			if ar.lastRS.Name != replicaSetName {
				corelog.LogErrorMessage(fmt.Sprintf("ignoring node %q not in expected replset: %q vs %q", addr, ar.lastRS.Name, replicaSetName))
				continue
			}
		}

		// First successful address.
		if r == nil {
			r = ar
			continue
		}

		// Ensure same as already established ReplicaSetState.
		if err := r.AssertEqual(ar); err != nil {
			return nil, err
		}
	}

	if r == nil {
		return nil, fmt.Errorf("could not connect to any provided addresses: %v", addrs)
	}

	return r, nil
}
Beispiel #5
0
// ServeHTTP makes ContextHandler satisifies the http.Handler interface
func (ch *ContextHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
	ch.Context, ch.Cancel = context.WithCancel(r.Context())
	ch.RequestID = uuid.New()
	ch.Context = context.WithValue(ch.Context, "requestID", ch.RequestID)
	ch.Logger = ch.Logger.SetStandardFields("requestID", ch.RequestID)

	// measure timing info
	defer metrics.MeasureSince(fmt.Sprintf("api.%s_%s", r.URL.String(), r.Method), time.Now())

	defer func(ctx context.Context) {
		// Panic recovery
		if rcv := recover(); rcv != nil {
			log.LogErrorMessage("Request Panicked", "status", 500, "requestID", ctx.Value("requestID"), "error", rcv)
			ch.Metrics.IncrementCount(fmt.Sprintf("api.%s_%s.error", r.URL.String(), r.Method))
			err := errors.New(fmt.Sprint(rcv))
			ch.Monitor.CaptureExceptionWithTags(err, "requestID", ctx.Value("requestID"), "endpoint", r.URL.String())
			JSONErrorResponse(500, err).WriteTo(w)
		}
		ch.Cancel() // cancel on error
	}(ch.Context)

	ch.handlerFunc(ch, &StatusWrappingResponseWriter{w, 0}, r)
	ch.Cancel()
}
Beispiel #6
0
func (manager *StateManager) stopProxy(proxy *Proxy) {
	if err := proxy.stop(true); err != nil {
		corelog.LogErrorMessage(fmt.Sprintf("Failed to stop proxy %s", proxy))
	}
}
Beispiel #7
0
func (manager *StateManager) startProxy(proxy *Proxy) {
	if err := proxy.Start(); err != nil {
		corelog.LogErrorMessage(fmt.Sprintf("Failed to start proxy %s", proxy))
	}
}
Beispiel #8
0
// clientServeLoop loops on a single client connected to the proxy and
// dispatches its requests.
func (p *Proxy) clientServeLoop(c net.Conn) {
	remoteIP := c.RemoteAddr().(*net.TCPAddr).IP.String()

	// enforce per-client max connection limit
	if p.maxPerClientConnections.inc(remoteIP) {
		c.Close()
		stats.BumpSum(p.stats, "client.rejected.max.connections", 1)
		corelog.LogErrorMessage(fmt.Sprintf("rejecting client connection due to max connections limit: %s", remoteIP))
		return
	}

	// turn on TCP keep-alive and set it to the recommended period of 2 minutes
	// http://docs.mongodb.org/manual/faq/diagnostics/#faq-keepalive
	if conn, ok := c.(*net.TCPConn); ok {
		conn.SetKeepAlivePeriod(2 * time.Minute)
		conn.SetKeepAlive(true)
	}

	c = teeIf(fmt.Sprintf("client %s <=> %s", c.RemoteAddr(), p), c)
	stats.BumpSum(p.stats, "client.connected", 1)
	defer func() {
		p.wg.Done()
		if err := c.Close(); err != nil {
			corelog.LogError("error", err)
		}
		p.maxPerClientConnections.dec(remoteIP)
	}()

	var lastError LastError
	for {
		h, err := p.idleClientReadHeader(c)
		if err != nil {
			if err != errNormalClose {
				corelog.LogError("error", err)
			}
			return
		}

		mpt := stats.BumpTime(p.stats, "message.proxy.time")
		serverConn, err := p.getServerConn()
		if err != nil {
			if err != errNormalClose {
				corelog.LogError("error", err)
			}
			return
		}

		scht := stats.BumpTime(p.stats, "server.conn.held.time")
		for {
			err := p.proxyMessage(h, c, serverConn, &lastError)
			if err != nil {
				p.serverPool.Discard(serverConn)
				corelog.LogErrorMessage(fmt.Sprintf("Proxy message failed %s ", err))
				stats.BumpSum(p.stats, "message.proxy.error", 1)
				if ne, ok := err.(net.Error); ok && ne.Timeout() {
					stats.BumpSum(p.stats, "message.proxy.timeout", 1)
				}
				return
			}

			// One message was proxied, stop it's timer.
			mpt.End()

			if !h.OpCode.IsMutation() {
				break
			}

			// If the operation we just performed was a mutation, we always make the
			// follow up request on the same server because it's possibly a getLastErr
			// call which expects this behavior.

			stats.BumpSum(p.stats, "message.with.mutation", 1)
			h, err = p.gleClientReadHeader(c)
			if err != nil {
				// Client did not make _any_ query within the GetLastErrorTimeout.
				// Return the server to the pool and wait go back to outer loop.
				if err == errClientReadTimeout {
					break
				}
				// Prevent noise of normal client disconnects, but log if anything else.
				if err != errNormalClose {
					corelog.LogError("error", err)
				}
				// We need to return our server to the pool (it's still good as far
				// as we know).
				p.serverPool.Release(serverConn)
				return
			}

			// Successfully read message when waiting for the getLastError call.
			mpt = stats.BumpTime(p.stats, "message.proxy.time")
		}
		p.serverPool.Release(serverConn)
		scht.End()
		stats.BumpSum(p.stats, "message.proxy.success", 1)
	}
}
Beispiel #9
0
func (r *ReplicaSet) HandleFailure() {
	corelog.LogErrorMessage("Crashing dvara due to consecutive failed healthchecks")
	r.Stats.BumpSum("healthcheck.failed.panic", 1)
	panic("failed healthchecks")
}
Beispiel #10
0
func (l *Logger) Errorf(f string, args ...interface{}) {
	corelog.LogErrorMessage(fmt.Sprintf(f, args...))
}