Exemplo n.º 1
0
func (manager *StateManager) Start() error {
	corelog.LogInfoMessage("starting manager")
	manager.Lock()
	defer manager.Unlock()
	var err error
	manager.currentReplicaSetState, err = manager.generateReplicaSetState()
	if err != nil {
		return err
		return errors.New(fmt.Sprintf("error starting statemanager, replicaset in flux: %v", err))
	}
	healthyAddrs := manager.currentReplicaSetState.Addrs()

	// Ensure we have at least one health address.
	if len(healthyAddrs) == 0 {
		return stackerr.Newf("no healthy primaries or secondaries: %s", manager.replicaSet.Addrs)
	}

	manager.addProxies(healthyAddrs...)

	for _, proxy := range manager.proxies {
		go manager.startProxy(proxy)
	}
	manager.refreshTime = time.Now()
	return nil
}
Exemplo n.º 2
0
func (manager *StateManager) removeProxy(proxy *Proxy) {
	if _, ok := manager.proxyToReal[proxy.ProxyAddr]; !ok {
		corelog.LogErrorMessage(fmt.Sprintf("proxy %s does not exist in ReplicaSet", proxy.ProxyAddr))
	}
	if _, ok := manager.realToProxy[proxy.MongoAddr]; !ok {
		corelog.LogErrorMessage(fmt.Sprintf("mongo %s does not exist in ReplicaSet", proxy.ProxyAddr))
	}
	corelog.LogInfoMessage(fmt.Sprintf("removed %s", proxy))
	delete(manager.proxyToReal, proxy.ProxyAddr)
	delete(manager.realToProxy, proxy.MongoAddr)
	delete(manager.proxies, proxy.ProxyAddr)
}
Exemplo n.º 3
0
func (manager *StateManager) addProxy(proxy *Proxy) (*Proxy, error) {
	if _, ok := manager.proxyToReal[proxy.ProxyAddr]; ok {
		return nil, fmt.Errorf("proxy %s already used in ReplicaSet", proxy.ProxyAddr)
	}
	if _, ok := manager.realToProxy[proxy.MongoAddr]; ok {
		return nil, fmt.Errorf("mongo %s already exists in ReplicaSet", proxy.MongoAddr)
	}
	corelog.LogInfoMessage(fmt.Sprintf("added %s", proxy))
	manager.proxyToReal[proxy.ProxyAddr] = proxy.MongoAddr
	manager.realToProxy[proxy.MongoAddr] = proxy.ProxyAddr
	manager.proxies[proxy.ProxyAddr] = proxy
	return proxy, nil
}
Exemplo n.º 4
0
// proxyMessage proxies a message, possibly it's response, and possibly a
// follow up call.
func (p *Proxy) proxyMessage(
	h *messageHeader,
	client net.Conn,
	server net.Conn,
	lastError *LastError,
) error {
	deadline := time.Now().Add(p.ReplicaSet.MessageTimeout)
	server.SetDeadline(deadline)
	client.SetDeadline(deadline)

	// OpQuery may need to be transformed and need special handling in order to
	// make the proxy transparent.
	if h.OpCode == OpQuery {
		stats.BumpSum(p.stats, "message.with.response", 1)
		return p.ReplicaSet.ProxyQuery.Proxy(h, client, server, lastError)
	}

	// Anything besides a getlasterror call (which requires an OpQuery) resets
	// the lastError.
	if lastError.Exists() {
		corelog.LogInfoMessage("reset getLastError cache")
		lastError.Reset()
	}

	// For other Ops we proxy the header & raw body over.
	if err := h.WriteTo(server); err != nil {
		corelog.LogError("error", err)
		return err
	}

	if _, err := io.CopyN(server, client, int64(h.MessageLength-headerLen)); err != nil {
		corelog.LogError("error", err)
		return err
	}

	// For Ops with responses we proxy the raw response message over.
	if h.OpCode.HasResponse() {
		stats.BumpSum(p.stats, "message.with.response", 1)
		if err := copyMessage(client, server); err != nil {
			corelog.LogError("error", err)
			return err
		}
	}

	return nil
}
Exemplo n.º 5
0
// Proxy proxies an OpQuery and a corresponding response.
func (p *ProxyQuery) Proxy(
	h *messageHeader,
	client io.ReadWriter,
	server io.ReadWriter,
	lastError *LastError,
) error {

	// https://github.com/mongodb/mongo/search?q=lastError.disableForCommand
	// Shows the logic we need to be in sync with. Unfortunately it isn't a
	// simple check to determine this, and may change underneath us at the mongo
	// layer.
	resetLastError := true

	parts := [][]byte{h.ToWire()}

	var flags [4]byte
	if _, err := io.ReadFull(client, flags[:]); err != nil {
		corelog.LogError("error", err)
		return err
	}
	parts = append(parts, flags[:])

	fullCollectionName, err := readCString(client)
	if err != nil {
		corelog.LogError("error", err)
		return err
	}
	parts = append(parts, fullCollectionName)

	var rewriter responseRewriter
	if *proxyAllQueries || bytes.HasSuffix(fullCollectionName, cmdCollectionSuffix) {
		var twoInt32 [8]byte
		if _, err := io.ReadFull(client, twoInt32[:]); err != nil {
			corelog.LogError("error", err)
			return err
		}
		parts = append(parts, twoInt32[:])

		queryDoc, err := readDocument(client)
		if err != nil {
			corelog.LogError("error", err)
			return err
		}
		parts = append(parts, queryDoc)

		var q bson.D
		if err := bson.Unmarshal(queryDoc, &q); err != nil {
			corelog.LogError("error", err)
			return err
		}

		if hasKey(q, "getLastError") {
			return p.GetLastErrorRewriter.Rewrite(
				h,
				parts,
				client,
				server,
				lastError,
			)
		}

		if hasKey(q, "isMaster") {
			rewriter = p.IsMasterResponseRewriter
		}
		if bytes.Equal(adminCollectionName, fullCollectionName) && hasKey(q, "replSetGetStatus") {
			rewriter = p.ReplSetGetStatusResponseRewriter
		}

		if rewriter != nil {
			// If forShell is specified, we don't want to reset the last error. See
			// comment above around resetLastError for details.
			resetLastError = hasKey(q, "forShell")
		}
	}

	if resetLastError && lastError.Exists() {
		corelog.LogInfoMessage("reset getLastError cache")
		lastError.Reset()
	}

	var written int
	for _, b := range parts {
		n, err := server.Write(b)
		if err != nil {
			corelog.LogError("error", err)
			return err
		}
		written += n
	}

	pending := int64(h.MessageLength) - int64(written)
	if _, err := io.CopyN(server, client, pending); err != nil {
		corelog.LogError("error", err)
		return err
	}

	if rewriter != nil {
		if err := rewriter.Rewrite(client, server); err != nil {
			return err
		}
		return nil
	}

	if err := copyMessage(client, server); err != nil {
		corelog.LogError("error", err)
		return err
	}

	return nil
}
Exemplo n.º 6
0
// Rewrite handles getLastError requests.
func (r *GetLastErrorRewriter) Rewrite(
	h *messageHeader,
	parts [][]byte,
	client io.ReadWriter,
	server io.ReadWriter,
	lastError *LastError,
) error {

	if !lastError.Exists() {
		// We're going to be performing a real getLastError query and caching the
		// response.
		var written int
		for _, b := range parts {
			n, err := server.Write(b)
			if err != nil {
				corelog.LogError("error", err)
				return err
			}
			written += n
		}

		pending := int64(h.MessageLength) - int64(written)
		if _, err := io.CopyN(server, client, pending); err != nil {
			corelog.LogError("error", err)
			return err
		}

		var err error
		if lastError.header, err = readHeader(server); err != nil {
			corelog.LogError("error", err)
			return err
		}
		pending = int64(lastError.header.MessageLength - headerLen)
		if _, err = io.CopyN(&lastError.rest, server, pending); err != nil {
			corelog.LogError("error", err)
			return err
		}
		corelog.LogInfoMessage(fmt.Sprintf("caching new getLastError response: %s", lastError.rest.Bytes()))
	} else {
		// We need to discard the pending bytes from the client from the query
		// before we send it our cached response.
		var written int
		for _, b := range parts {
			written += len(b)
		}
		pending := int64(h.MessageLength) - int64(written)
		if _, err := io.CopyN(ioutil.Discard, client, pending); err != nil {
			corelog.LogError("error", err)
			return err
		}
		// Modify and send the cached response for this request.
		lastError.header.ResponseTo = h.RequestID
		corelog.LogInfoMessage("using cached getLastError response: %s", lastError.rest.Bytes())
	}

	if err := lastError.header.WriteTo(client); err != nil {
		corelog.LogError("error", err)
		return err
	}
	if _, err := client.Write(lastError.rest.Bytes()); err != nil {
		corelog.LogError("error", err)
		return err
	}

	return nil
}
Exemplo n.º 7
0
func (l *Logger) Debugf(f string, args ...interface{}) {
	corelog.LogInfoMessage(fmt.Sprintf(f, args...))
}
Exemplo n.º 8
0
func Main() error {
	addrs := flag.String("addrs", "localhost:27017", "comma separated list of mongo addresses")
	clientIdleTimeout := flag.Duration("client_idle_timeout", 60*time.Minute, "idle timeout for client connections")
	getLastErrorTimeout := flag.Duration("get_last_error_timeout", time.Minute, "timeout for getLastError pinning")
	listenAddr := flag.String("listen", "127.0.0.1", "address for listening, for example, 127.0.0.1 for reachable only from the same machine, or 0.0.0.0 for reachable from other machines")
	maxConnections := flag.Uint("max_connections", 100, "maximum number of connections per mongo")
	maxPerClientConnections := flag.Uint("max_per_client_connections", 100, "maximum number of connections from a single client")
	messageTimeout := flag.Duration("message_timeout", 2*time.Minute, "timeout for one message to be proxied")
	password := flag.String("password", "", "mongodb password")
	portEnd := flag.Int("port_end", 6010, "end of port range")
	portStart := flag.Int("port_start", 6000, "start of port range")
	serverClosePoolSize := flag.Uint("server_close_pool_size", 1, "number of goroutines that will handle closing server connections.")
	serverIdleTimeout := flag.Duration("server_idle_timeout", 60*time.Minute, "duration after which a server connection will be considered idle")
	username := flag.String("username", "", "mongo db username")
	metricsAddress := flag.String("metrics", "127.0.0.1:8125", "UDP address to send metrics to datadog, default is 127.0.0.1:8125")
	replicaName := flag.String("replica_name", "", "Replica name, used in metrics and logging, default is empty")
	replicaSetName := flag.String("replica_set_name", "", "Replica set name, used to filter hosts runnning other replica sets")
	healthCheckInterval := flag.Duration("healthcheckinterval", 5*time.Second, "How often to run the health check")
	failedHealthCheckThreshold := flag.Uint("failedhealthcheckthreshold", 3, "How many failed checks before a restart")

	flag.Parse()
	statsClient := NewDataDogStatsDClient(*metricsAddress, "replica:"+*replicaName)

	replicaSet := dvara.ReplicaSet{
		Addrs:                   *addrs,
		ClientIdleTimeout:       *clientIdleTimeout,
		GetLastErrorTimeout:     *getLastErrorTimeout,
		ListenAddr:              *listenAddr,
		MaxConnections:          *maxConnections,
		MaxPerClientConnections: *maxPerClientConnections,
		MessageTimeout:          *messageTimeout,
		Password:                *password,
		PortEnd:                 *portEnd,
		PortStart:               *portStart,
		ServerClosePoolSize:     *serverClosePoolSize,
		ServerIdleTimeout:       *serverIdleTimeout,
		Username:                *username,
		Name:                    *replicaSetName,
	}
	stateManager := dvara.NewStateManager(&replicaSet)

	// Actual logger
	corelog.SetupLogFmtLoggerTo(os.Stderr)
	corelog.SetStandardFields("replicaset", *replicaName)
	corelog.UseTimestamp(true)

	// Log command line args
	startupOptions := []interface{}{}
	flag.CommandLine.VisitAll(func(flag *flag.Flag) {
		if flag.Name != "password" {
			startupOptions = append(startupOptions, flag.Name, flag.Value.String())
		}
	})
	corelog.LogInfoMessage("starting with command line arguments", startupOptions...)

	// Wrapper for inject
	log := Logger{}

	var graph inject.Graph
	err := graph.Provide(
		&inject.Object{Value: &replicaSet},
		&inject.Object{Value: &statsClient},
		&inject.Object{Value: stateManager},
	)
	if err != nil {
		return err
	}
	if err := graph.Populate(); err != nil {
		return err
	}
	objects := graph.Objects()

	hc := &dvara.HealthChecker{
		HealthCheckInterval:        *healthCheckInterval,
		FailedHealthCheckThreshold: *failedHealthCheckThreshold,
	}

	if err := startstop.Start(objects, &log); err != nil {
		return err
	}
	defer startstop.Stop(objects, &log)

	syncChan := make(chan struct{})
	go stateManager.KeepSynchronized(syncChan)
	go hc.HealthCheck(&replicaSet, syncChan)

	ch := make(chan os.Signal, 2)
	signal.Notify(ch, syscall.SIGTERM, syscall.SIGINT)
	<-ch
	signal.Stop(ch)
	return nil
}